def guide(self, xs, ys=None): with pyro.plate("data"): batch_size = xs.size(0) # if the class label (the digit) is not supervised, sample # (and score) the digit with the variational distribution # q(y|x) = categorical(alpha(x)) if ys is None: # if there is an unlabbeld datapoint, we take the values for x the observations, # and we output an alpha which parameterises the classifier. alpha = self.encoder_y.forward(xs) # then we sample a classification using this parameterisation of the classifier. # the classifier is also like a generative model, where given the latents alpha, we # output an observation y # and the latents alpha are given by an encoder ys = pyro.sample("y", dist.Multinomial(logits=alpha)) # if the labels y is known, then we dont have to sample from the above, # we just feed the actual y in to the encoder that takes x and y. # sample (and score) the latent handwriting-style with the variational # distribution q(z|x,y) = normal(loc(x,y),scale(x,y)) # change ys to one hot should do this somewhere else TODO loc, scale = self.encoder_z.forward(xs, ys) pyro.sample("z", dist.Normal(loc, scale).to_event(1))
def setUp(self): n = [[8], [5]] self.ps = Variable(torch.Tensor([[0.1, 0.6, 0.3], [0.4, 0.1, 0.5]])) self.n = Variable(torch.Tensor(n)) # self.test_data = Variable(torch.Tensor([0, 0, 1, 1, 2, 1, 1, 2])) self.test_data = Variable(torch.Tensor([[2, 4, 2], [2, 0, 3]])) self.dist = dist.Multinomial(self.ps, self.n, batch_size=1)
def model(self, x): pyro.module("decoder_c", self.decoder_c) pyro.module("decoder_y", self.decoder_y) with pyro.plate("data", x.shape[0]): # x is (Outcome, Class, Age, Sex) # prior on U_c mean_c = x.new_zeros(torch.Size((x.shape[0], self.U_c_dim))) std_c = x.new_ones(torch.Size((x.shape[0], self.U_c_dim))) U_c = pyro.sample("U_c", dist.Normal(mean_c, std_c).to_event(1)) # prior on U_y mean_y = x.new_zeros(torch.Size((x.shape[0], self.U_y_dim))) std_y = x.new_ones(torch.Size((x.shape[0], self.U_y_dim))) U_y = pyro.sample("U_y", dist.Normal(mean_y, std_y).to_event(1)) # prior on Age mean_a = 29.7*x.new_ones(torch.Size((x.shape[0], 1))) std_a = 14.5*x.new_ones(torch.Size((x.shape[0], 1))) A = pyro.sample("Age", dist.Normal(mean_a, std_a).to_event(1)) # prior on Sex prob_s = 0.6476*x.new_ones(torch.Size((x.shape[0], 1))) S = pyro.sample("Sex", dist.Bernoulli(prob_s).to_event(1)) # decode the latent code z C_probs = self.decoder_c(U_c, A, S) C = pyro.sample("Class", dist.Multinomial(probs=C_probs).to_event(1), obs=to_one_hot(x[:, 1], self.num_classes)) C = one_hot_to_idx(C) # score against actual outcome Y = self.decoder_y(U_y, A, S, C) pyro.sample("Outcome", dist.Bernoulli(Y).to_event(1), obs=x[:, 0].reshape(-1, 1))
def model(self, peak_idx, read_depth, onehot_obs=None): pyro.module("decoder", self.decoder) #pyro.module("decoder", self.decoder) with pyro.plate("cells", peak_idx.shape[0]): # Dirichlet prior 𝑝(𝜃|𝛼) is replaced by a log-normal distribution theta_loc = self.prior_mu * peak_idx.new_ones( (peak_idx.shape[0], self.num_topics)) theta_scale = self.prior_std * peak_idx.new_ones( (peak_idx.shape[0], self.num_topics)) theta = pyro.sample( "theta", dist.LogNormal(theta_loc, theta_scale).to_event(1)) theta = theta / theta.sum(-1, keepdim=True) # conditional distribution of 𝑤𝑛 is defined as # 𝑤𝑛|𝛽,𝜃 ~ Categorical(𝜎(𝛽𝜃)) peak_probs = self.decoder(theta) pyro.sample( 'obs', dist.Multinomial( total_count=read_depth if onehot_obs is None else 1, probs=peak_probs).to_event(1), obs=onehot_obs)
def model(self, xs, y=None): # register this pytorch module and all of its sub-modules with pyro pyro.module("ss_vae", self) batch_size = xs.size(0) # inform Pyro that the variables in the batch of xs, ys are conditionally independent with pyro.plate("data"): # sample the handwriting style from the constant prior distribution prior_loc = xs.new_zeros([batch_size, self.z_dim]) prior_scale = xs.new_ones([batch_size, self.z_dim]) zs = pyro.sample("z", dist.Normal(prior_loc, prior_scale).to_event(1)) # if the label y (which digit to write) is supervised, sample from the # constant prior, otherwise, observe the value (i.e. score it against the constant prior) alpha_prior = xs.new_ones([batch_size, self.y_dim]) / (1.0 * self.y_dim) # vector of probabilities for each class, i.e. output_size # its a uniform prior # making labels one hot for onehotcat ys = pyro.sample("y", dist.Multinomial(logits=alpha_prior), obs=y) # one of the categories will be sampled, according to the distribution specified by alpha prior # finally, score the image (x) using the handwriting style (z) and # the class label y (which digit to write) against the # parametrized distribution p(x|y,z) = bernoulli(decoder(y,z)) # where `decoder` is a neural network loc = self.decoder.forward(zs, ys) # decoder networks takes a category, and a latent variable and outputs an observation x. pyro.sample("x", dist.Bernoulli(loc).to_event(3), obs=xs)
def model(self, data): ''' The generative distribution ''' pyro.module("decoder", self.decoder) # sample all the priors simulaneously with pyro.iarange("score_sample", len(self.vocab)): z = pyro.sample(f'latent_scores', dist.Dirichlet(self.alpha_prior), ) datasets = data.source.unique() # loop through the datasets for i in pyro.irange("data_loop", len(datasets)): dataset = datasets[i] subset = data.loc[data.source == dataset] sent = torch.tensor(subset.sent.values.tolist(), dtype=torch.float) if len(sent.shape) == 1: sent = sent.unsqueeze(-1) z_word = z[subset.word_id.values] rho = self.decoder.forward(z_word, dataset) if dataset in ['mpqa', 'huliu', 'general_inquirer']: pyro.sample(f"obs_{dataset}", dist.Bernoulli(rho), obs=sent) if dataset == 'vader': if self.vader_multinomial: pyro.sample( f"obs_{dataset}", dist.Multinomial(probs=rho, total_count=10), obs=sent, ) else: n = rho.size(0) batch = n // 20 for j in pyro.irange("vader_chunks", 20): pyro.sample( f"obs_{dataset}_{j}", dist.Categorical(rho[j*batch:(j+1)*batch,:]), obs=sent + 4. ) if dataset == 'senticnet': loc, scale = rho pyro.sample(f"obs_{dataset}", dist.Normal(loc, scale), obs=sent) if dataset == 'sentiwordnet': loc, scale = rho pyro.sample( f"obs_{dataset}", dist.MultivariateNormal(loc, scale), obs=sent )
def setUp(self): n = 8 self.ps = Variable(torch.Tensor([0.1, 0.6, 0.3])) self.n = Variable(torch.Tensor([n])) # self.test_data = Variable(torch.Tensor([0, 0, 1, 1, 2, 1, 1, 2])) self.test_data = Variable(torch.Tensor([2, 4, 2])) self.dist = dist.Multinomial(self.ps, self.n) self.analytic_mean = n * self.ps one = Variable(torch.ones(3)) self.analytic_var = n * torch.mul(self.ps, one.sub(self.ps)) self.n_samples = 50000
def model(data): # lets define the parameters for the 6 sided die. # A Dirichlet prior is a standard non-informative prior # for a multinomial distribution. Such a prior is useful # when there aren't any current beliefs of the about the # distrbution of the latent variables. f = pyro.sample("latent_fairness", dist.Dirichlet(torch.ones(6))) for i in range(len(data)): # observe datapoint i using the multinomial likelihood i.e. a die having 6 faces. pyro.sample("obs_{}".format(i), dist.Multinomial(probs=f), obs=data[i])
def test_dirichlet_multinomial(sample_shape, batch_shape): concentration = torch.randn(batch_shape + (3,)).exp() total = 10 probs = torch.tensor([0.2, 0.3, 0.5]) obs = dist.Multinomial(total, probs).sample(sample_shape + batch_shape) f = dist.Dirichlet(concentration) g = dist.Dirichlet(1 + obs) fg, log_normalizer = f.conjugate_update(g) x = fg.sample(sample_shape) assert_close(f.log_prob(x) + g.log_prob(x), fg.log_prob(x) + log_normalizer)
def forward(self, x, y = None): # latent variable z = self.linear_layer(x) z = torch.nn.functional.relu(z) z = self.output_layer(z) z = torch.nn.functional.softmax(z, dim=1) # likelihood with pyro.plate("data",size = x.shape[0], dim = -1): # I think this means each batch is independent # z is the input to the distribution (categorical) obs = pyro.sample("obs",D.Multinomial(probs = z), obs=y) # return latent variable return z
def model(data): s0 = (nd, nz, 1, Td) s1 = (nz, 1, nw, ntr) alpha0 = torch.ones(*s0).cpu() alpha1 = torch.ones(*s1).cpu() z = pyro.sample("latent0", pdist.Dirichlet(concentration=alpha0.view(nd, -1))) motifs = pyro.sample("latent1", pdist.Dirichlet(concentration=alpha1.view(nz, -1))) z = z.reshape(*s0) motifs = motifs.reshape(*s1) p = p_w_ta_d(z, motifs) with pyro.iarange("data", len(data)): zts = pyro.sample("zts", pdist.Categorical(probs=z)) pyro.sample("observe", pdist.Multinomial(probs=p), obs=data)
def test_dirichlet_multinomial_log_prob(total_count, batch_shape, is_sparse): event_shape = (3, ) concentration = torch.rand(batch_shape + event_shape).exp() # test on one-hots value = total_count * torch.eye(3).reshape(event_shape + (1, ) * len(batch_shape) + event_shape) num_samples = 100000 probs = dist.Dirichlet(concentration).sample((num_samples, 1)) log_probs = dist.Multinomial(total_count, probs).log_prob(value) assert log_probs.shape == (num_samples, ) + event_shape + batch_shape expected = log_probs.logsumexp(0) - math.log(num_samples) actual = DirichletMultinomial(concentration, total_count, is_sparse).log_prob(value) assert_close(actual, expected, atol=0.05)
def pyrocov_model_relaxed(dataset): # Tensor shapes are commented at the end of some lines. features = dataset["features"] local_time = dataset["local_time"][..., None] # [T, P, 1] T, P, _ = local_time.shape S, F = features.shape weekly_strains = dataset["weekly_strains"] assert weekly_strains.shape == (T, P, S) # Sample global random variables. coef_scale = pyro.sample("coef_scale", dist.InverseGamma(5e3, 1e2))[..., None] rate_loc_scale = pyro.sample("rate_loc_scale", dist.LogNormal(-4, 2))[..., None] rate_scale = pyro.sample("rate_scale", dist.LogNormal(-4, 2))[..., None] init_loc_scale = pyro.sample("init_loc_scale", dist.LogNormal(0, 2))[..., None] init_scale = pyro.sample("init_scale", dist.LogNormal(0, 2))[..., None] # Assume relative growth rate depends strongly on mutations and weakly on place. coef_loc = torch.zeros(F) coef = pyro.sample("coef", dist.Logistic(coef_loc, coef_scale).to_event(1)) # [F] rate_loc = pyro.sample( "rate_loc", dist.Normal(0.01 * coef @ features.T, rate_loc_scale).to_event(1), ) # [S] # Assume initial infections depend strongly on strain and place. init_loc = pyro.sample( "init_loc", dist.Normal(torch.zeros(S), init_loc_scale).to_event(1) ) # [S] with pyro.plate("place", P, dim=-1): rate = pyro.sample( "rate", dist.Normal(rate_loc, rate_scale).to_event(1) ) # [P, S] init = pyro.sample( "init", dist.Normal(init_loc, init_scale).to_event(1) ) # [P, S] # Finally observe counts. with pyro.plate("time", T, dim=-2): logits = init + rate * local_time # [T, P, S] pyro.sample( "obs", dist.Multinomial(logits=logits, validate_args=False), obs=weekly_strains, )
def model(self, docs=None): # Register PyTorch module `decoder` with Pyro pyro.module("decoder", self.decoder) with pyro.plate("documents", docs.shape[0]): # Dirichlet prior 𝑝(𝜃|𝛼) is replaced by a log-normal distribution theta_loc = docs.new_zeros((docs.shape[0], self.num_topics)) theta_scale = docs.new_ones((docs.shape[0], self.num_topics)) theta = pyro.sample( "theta", dist.LogNormal(theta_loc, theta_scale).to_event(1)) theta = theta / theta.sum(-1, keepdim=True) # conditional distribution of 𝑤𝑛 is defined as # 𝑤𝑛|𝛽,𝜃 ~ Categorical(𝜎(𝛽𝜃)) count_param = self.decoder(theta) pyro.sample('obs', dist.Multinomial(docs.shape[1], count_param).to_event(1), obs=docs)
def model(self, doc_sum=None): # register PyTorch module `decoder` with Pyro pyro.module("recognition_net", self.recognition_net) with pyro.plate("documents", doc_sum.shape[0]): # setup hyperparameters theta_loc = doc_sum.new_zeros((doc_sum.shape[0], self.num_topics)) theta_scale = doc_sum.new_ones((doc_sum.shape[0], self.num_topics)) # sample from prior (value will be sampled by guide # when computing the ELBO) theta = pyro.sample( "theta", dist.LogNormal(theta_loc, (0.5 * theta_scale).exp()).to_event(1)) theta = theta / theta.sum(1, keepdim=True) count_param = self.recognition_net(theta) pyro.sample('obs', dist.Multinomial(doc_sum.shape[1], count_param).to_event(1), obs=doc_sum)
def model(data): # ADD: factor out the shapes # NB: this is just the initialization s0 = (nd, nz, 1, Td) s1 = (nz, 1, nw, ntr) alpha0 = torch.ones(*s0) alpha1 = torch.ones(*s1) # CHANGE: use the fact that dirichlet can draw independant dirichlets # TODO: essayer "get_param" z = pyro.sample("latent0", pdist.Dirichlet(concentration=alpha0.view(nd, -1))) motifs = pyro.sample("latent1", pdist.Dirichlet(concentration=alpha1.view(nz, -1))) # ADD: resize z and motifs z = z.reshape(*s0) motifs = motifs.reshape(*s1) with pyro.iarange("data", len(data)): # CHANGE: make explicit the fact that the number of observation is unused here pyro.sample("observe", pdist.Multinomial(-999, probs=p_w_ta_d(z, motifs)), obs=data)
def pyrocov_model_plated(dataset): # Tensor shapes are commented at the end of some lines. features = dataset["features"] local_time = dataset["local_time"][..., None] # [T, P, 1] T, P, _ = local_time.shape S, F = features.shape weekly_strains = dataset["weekly_strains"] # [T, P, S] assert weekly_strains.shape == (T, P, S) feature_plate = pyro.plate("feature", F, dim=-1) strain_plate = pyro.plate("strain", S, dim=-1) place_plate = pyro.plate("place", P, dim=-2) time_plate = pyro.plate("time", T, dim=-3) # Sample global random variables. coef_scale = pyro.sample("coef_scale", dist.InverseGamma(5e3, 1e2)) rate_loc_scale = pyro.sample("rate_loc_scale", dist.LogNormal(-4, 2)) rate_scale = pyro.sample("rate_scale", dist.LogNormal(-4, 2)) init_loc_scale = pyro.sample("init_loc_scale", dist.LogNormal(0, 2)) init_scale = pyro.sample("init_scale", dist.LogNormal(0, 2)) with feature_plate: coef = pyro.sample("coef", dist.Logistic(0, coef_scale)) # [F] rate_loc_loc = 0.01 * coef @ features.T with strain_plate: rate_loc = pyro.sample( "rate_loc", dist.Normal(rate_loc_loc, rate_loc_scale) ) # [S] init_loc = pyro.sample("init_loc", dist.Normal(0, init_loc_scale)) # [S] with place_plate, strain_plate: rate = pyro.sample("rate", dist.Normal(rate_loc, rate_scale)) # [P, S] init = pyro.sample("init", dist.Normal(init_loc, init_scale)) # [P, S] # Finally observe counts. with time_plate, place_plate: logits = (init + rate * local_time)[..., None, :] # [T, P, 1, S] pyro.sample( "obs", dist.Multinomial(logits=logits, validate_args=False), obs=weekly_strains[..., None, :], )
def model(self, data): # ADD: factor out the shapes # NB: this is just the initialization motifs_starting_times_shape = (self.documents_number, self.latent_motifs_number, 1, self.adjusted_documents_length) motifs_shape = (self.latent_motifs_number, 1, self.words_number, self.relative_time_length) motifs_starting_times_concentration = torch.ones( *motifs_starting_times_shape) motifs_concentration = torch.ones(*motifs_shape) # CHANGE: use the fact that dirichlet can draw independant dirichlets # TODO: essayer "get_param" motifs_starting_times = pyro.sample( "motifs_starting_times", pdist.Dirichlet( concentration=motifs_starting_times_concentration.view( self.documents_number, -1))) motifs = pyro.sample( "motifs", pdist.Dirichlet(concentration=motifs_concentration.view( self.latent_motifs_number, -1))) # ADD: resize motifs_starting_times and motifs motifs_starting_times = motifs_starting_times.reshape( *motifs_starting_times_shape) motifs = motifs.reshape(*motifs_shape) with pyro.plate("data", len(data), subsample_size=100): # CHANGE: make explicit the fact that the number of observation is unused here pyro.sample("observe", pdist.Multinomial(-999, probs=self.p_w_ta_d( motifs_starting_times, motifs)), obs=data)
def multinomial_to_data(funsor_dist, name_to_dim=None): probs = to_data(funsor_dist.probs, name_to_dim) total_count = to_data(funsor_dist.total_count, name_to_dim) if isinstance(total_count, numbers.Number) or len(total_count.shape) == 0: return dist.Multinomial(int(total_count), probs=probs) raise NotImplementedError("inhomogeneous total_count not supported")
def MultinomialLogit(_name, n, l): return {'x': pyro.sample(_name, dist.Multinomial(n, logits=l))}
def Multinomial(_name, n, p): return {'x': pyro.sample(_name, dist.Multinomial(n, p))}
def multinomial_loss(probs, values): return torch.sum(-1 *D.Multinomial(1, probs=probs).log_prob(values.float()))