def _main(self, ratings, validation, jit=False, num_samples=30, warmup_steps=5, num_chains=1): ratingstmp = np.zeros((self.n_user, self.n_item), dtype='float64') for rat in ratings: ratingstmp[rat[0], rat[1]] = rat[2] nuts_kernel = NUTS( self._conditioned_model, jit_compile=jit, ) posterior = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps, num_chains=num_chains, disable_progbar=False).run(self._model, ratingstmp) sites = ['mu_item', 'mu_user'] + [ 'u_temp_feature' + str(user_id) for user_id in xrange(self.n_user) ] + [ 'i_temp_feature' + str(item_id) for item_id in xrange(self.n_item) ] marginal = posterior.marginal(sites=sites) marginal = torch.cat(list(marginal.support(flatten=True).values()), dim=-1) numOfres = (2 + int(self.n_user) + int(self.n_item)) * int( self.n_feature) print("shape : " + str(marginal.shape)) print("numOfres : " + str(numOfres)) marginal = marginal[num_samples - 1] ifmatrix = torch.zeros( (int(self.n_item), int(self.n_feature))).detach().numpy() ufmatrix = torch.zeros( (int(self.n_user), int(self.n_feature))).detach().numpy() marginal = torch.reshape( marginal, (2 + int(self.n_user) + int(self.n_item), int(self.n_feature))) for i in range(int(self.n_user)): ufmatrix[i, :] = marginal[2 + i] for i in range(int(self.n_item)): ifmatrix[i, :] = marginal[2 + int(self.n_user) + i] Y = np.matmul(ufmatrix, ifmatrix.transpose()) for x in range(self.n_user): for y in range(self.n_item): if Y[x, y] > self.max_rating: Y[x, y] = self.max_rating elif Y[x, y] < self.min_rating: Y[x, y] = self.min_rating output_file.write("item feature : \n") output_file.write(str(ifmatrix) + "\n") output_file.write("user feature : \n") output_file.write(str(ufmatrix) + "\n") output_file.write("Y : \n") output_file.write(str(Y) + "\n") numOfrat = 0 summ = 0.0 for rat in validation: numOfrat += 1 summ += np.power(Y[rat[0], rat[1]] - rat[2], 2) rmse = np.power(summ / numOfrat, 0.5) output_file.write("RMSE: " + str(rmse)) print("RMSE: " + str(rmse))
fo.write('\n') fo.close() nn_model = ADDAModel(args.model) label = args.label return nn_model, label, save_fn if __name__ == '__main__': set_start_method('spawn') # fashion_mnist labels # [0: 'T-shirt', 1: 'Trouser', 2: 'Pullover', 3: 'Dress', 4: 'Coat', # 5: 'Sandal', 6: 'Shirt', 7: 'Sneaker', 8: 'Bag', 9: 'Ankle boot'] parser = argparse.ArgumentParser() parser.add_argument('--num-samples', type=int, default=1000) parser.add_argument('--num-warmups', type=int, default=1000) parser.add_argument('--num-chains', type=int, default=1) parser.add_argument('--overwrite', action='store_true') parser.add_argument('--model') # 'baseline' or 'adda' parser.add_argument('--label', type=int) args = parser.parse_args() nn_model, label, save_fn = setting(args) nuts = NUTS(program_da) mcmc = MCMC(nuts, num_samples=args.num_samples, warmup_steps=args.num_warmups, num_chains=args.num_chains) mcmc.run(nn_model, label) zs = mcmc.get_samples()['z'].detach().cpu().numpy() np.savetxt(save_fn, zs)
def run_inference(data, gen_model, ode_model, method, iterations = 10000, num_particles = 1, \ num_samples = 1000, warmup_steps = 500, init_scale = 0.1, \ seed = 12, lr = 0.5, return_sites = ("_RETURN")): torch_data = torch.tensor(data, dtype=torch.float) if isinstance(ode_model,ForwardSensManualJacobians) or \ isinstance(ode_model,ForwardSensTorchJacobians): ode_op = ForwardSensOp elif isinstance(ode_model,AdjointSensManualJacobians) or \ isinstance(ode_model,AdjointSensTorchJacobians): ode_op = AdjointSensOp else: raise ValueError( 'Unknown sensitivity solver: Use "Forward" or "Adjoint"') model = gen_model(ode_op, ode_model) pyro.set_rng_seed(seed) pyro.clear_param_store() if method == 'VI': guide = AutoMultivariateNormal(model, init_scale=init_scale) optim = AdagradRMSProp({"eta": lr}) if num_particles == 1: svi = SVI(model, guide, optim, loss=Trace_ELBO()) else: svi = SVI(model, guide, optim, loss=Trace_ELBO(num_particles=num_particles, vectorize_particles=True)) loss_trace = [] t0 = timer.time() for j in range(iterations): loss = svi.step(torch_data) loss_trace.append(loss) if j % 500 == 0: print("[iteration %04d] loss: %.4f" % (j + 1, np.mean(loss_trace[max(0, j - 1000):j + 1]))) t1 = timer.time() print('VI time: ', t1 - t0) predictive = Predictive( model, guide=guide, num_samples=num_samples, return_sites=return_sites) #"ode_params", "scale", vb_samples = predictive(torch_data) return vb_samples elif method == 'NUTS': nuts_kernel = NUTS(model, adapt_step_size=True, init_strategy=init_to_median) mcmc = MCMC(nuts_kernel, num_samples=iterations, warmup_steps=warmup_steps, num_chains=2) t0 = timer.time() mcmc.run(torch_data) t1 = timer.time() print('NUTS time: ', t1 - t0) hmc_samples = { k: v.detach().cpu().numpy() for k, v in mcmc.get_samples().items() } return hmc_samples else: raise ValueError('Unknown method: Use "NUTS" or "VI"')
def test_init_strategy_smoke(init_strategy): def model(): pyro.sample("x", dist.LogNormal(0, 1)) kernel = NUTS(model, init_strategy=init_strategy) kernel.setup(warmup_steps=10)
def localnews(INFERENCE): device = torch.device('cpu') torch.set_default_tensor_type(torch.DoubleTensor) if torch.cuda.is_available(): device = torch.device('cuda') torch.set_default_tensor_type(torch.cuda.DoubleTensor) # preprocess data data = pd.read_csv("data/localnews.csv", index_col=[0]) N = data.station_id.unique().shape[0] data.date = data.date.apply( lambda x: datetime.datetime.strptime(x, '%m/%d/%Y').date()) # data = data[(data.date<=datetime.date(2017, 9, 5)) & (data.date>=datetime.date(2017, 8, 25))] # data = data[data.station_id.isin([1345,3930])] ds = data.t.to_numpy().reshape((-1, 1)) ohe = OneHotEncoder() ohe = LabelEncoder() X = data.drop(columns=[ "station_id", "date", "national_politics", "sinclair2017", "post", "affiliation", "callsign", "t" ]).to_numpy().reshape(-1, ) # , "weekday","affiliation","callsign" Group = data.sinclair2017.to_numpy().reshape(-1, 1) ohe.fit(X) X = ohe.transform(X) station_le = LabelEncoder() ids = data.station_id.to_numpy().reshape(-1, ) station_le.fit(ids) ids = station_le.transform(ids) # weekday/day/unit effects and time trend X = np.concatenate((X.reshape(-1, 1), ds, ids.reshape(-1, 1), Group, ds), axis=1) # numbers of dummies for each effect X_max_v = [np.max(X[:, i]).astype(int) for i in range(X.shape[1] - 2)] Y = data.national_politics.to_numpy() T0 = data[data.date == datetime.date(2017, 9, 1)].t.to_numpy()[0] train_condition = (data.post != 1) | (data.sinclair2017 != 1) train_x = torch.Tensor(X[train_condition], device=device).double() train_y = torch.Tensor(Y[train_condition], device=device).double() idx = data.sinclair2017.to_numpy() train_g = torch.from_numpy(idx[train_condition]).to(device) test_x = torch.Tensor(X).double() test_y = torch.Tensor(Y).double() test_g = torch.from_numpy(idx) # define likelihood noise_prior = gpytorch.priors.GammaPrior(concentration=1, rate=10) likelihood = gpytorch.likelihoods.GaussianLikelihood(noise_prior=noise_prior if "MAP" in INFERENCE else None,\ noise_constraint=gpytorch.constraints.Positive()) # likelihood2 = gpytorch.likelihoods.GaussianLikelihood(noise_prior=noise_prior if "MAP" in INFERENCE else None,\ # noise_constraint=gpytorch.constraints.Positive()) model = MultitaskGPModel(test_x, test_y, X_max_v, likelihood, MAP="MAP" in INFERENCE) model.drift_t_module.T0 = T0 model2 = MultitaskGPModel(train_x, train_y, X_max_v, likelihood, MAP="MAP" in INFERENCE) # model2 = MultitaskGPModel(test_x, test_y, X_max_v, likelihood2, MAP="MAP" in INFERENCE) # model2.drift_t_module.T0 = T0 model2.double() # group effects # model.x_covar_module[0].c2 = torch.var(train_y) # model.x_covar_module[0].raw_c2.requires_grad = False # weekday/day/unit effects initialize to 0.05**2 for i in range(len(X_max_v)): model.x_covar_module[i].c2 = torch.tensor(0.05**2) # fix unit mean/variance by not requiring grad model.x_covar_module[-1].raw_c2.requires_grad = False # model.unit_mean_module.constant.data.fill_(0.12) # model.unit_mean_module.constant.requires_grad = False model.group_mean_module.constantvector.data[0].fill_(0.11) model.group_mean_module.constantvector.data[1].fill_(0.12) # set precision to double tensors torch.set_default_tensor_type(torch.DoubleTensor) train_x, train_y = train_x.to(device), train_y.to(device) test_x, test_y = test_x.to(device), test_y.to(device) model.to(device) likelihood.to(device) # define Loss for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) if torch.cuda.is_available(): train_x = train_x.cuda() train_y = train_y.cuda() model = model.cuda() likelihood = likelihood.cuda() if not os.path.isdir("results"): os.mkdir("results") transforms = { 'group_index_module.raw_rho': model.group_index_module.raw_rho_constraint.transform, 'group_t_covar_module.base_kernel.raw_lengthscale': model.group_t_covar_module.base_kernel.raw_lengthscale_constraint. transform, 'group_t_covar_module.raw_outputscale': model.group_t_covar_module.raw_outputscale_constraint.transform, 'unit_t_covar_module.base_kernel.raw_lengthscale': model.unit_t_covar_module.base_kernel.raw_lengthscale_constraint. transform, 'unit_t_covar_module.raw_outputscale': model.unit_t_covar_module.raw_outputscale_constraint.transform, 'likelihood.noise_covar.raw_noise': model.likelihood.noise_covar.raw_noise_constraint.transform, 'x_covar_module.0.raw_c2': model.x_covar_module[0].raw_c2_constraint.transform, 'x_covar_module.1.raw_c2': model.x_covar_module[1].raw_c2_constraint.transform #'x_covar_module.2.raw_c2': model.x_covar_module[2].raw_c2_constraint.transform } priors = { 'group_index_module.raw_rho': pyro.distributions.Normal(0, 1.5), 'group_t_covar_module.base_kernel.raw_lengthscale': pyro.distributions.Normal(30, 10).expand([1, 1]), 'group_t_covar_module.raw_outputscale': pyro.distributions.Normal(-7, 1), 'unit_t_covar_module.base_kernel.raw_lengthscale': pyro.distributions.Normal(30, 10).expand([1, 1]), 'unit_t_covar_module.raw_outputscale': pyro.distributions.Normal(-7, 1), 'likelihood.noise_covar.raw_noise': pyro.distributions.Normal(-7, 1).expand([1]), 'x_covar_module.0.raw_c2': pyro.distributions.Normal(-7, 1).expand([1]), 'x_covar_module.1.raw_c2': pyro.distributions.Normal(-7, 1).expand([1]) #'model.x_covar_module.2.raw_c2': pyro.distributions.Normal(-6, 1).expand([1]) } # plot_pyro_prior(priors, transforms) def pyro_model(x, y): fn = pyro.random_module("model", model, prior=priors) sampled_model = fn() output = sampled_model.likelihood(sampled_model(x)) pyro.sample("obs", output, obs=y) if INFERENCE == 'MCMCLOAD': with open('results/localnews_MCMC.pkl', 'rb') as f: mcmc_run = pickle.load(f) mcmc_samples = mcmc_run.get_samples() print(mcmc_run.summary()) plot_pyro_posterior(mcmc_samples, transforms) # plot_posterior(mcmc_samples) return for k, d in mcmc_samples.items(): mcmc_samples[k] = d[idx] model.pyro_load_from_samples(mcmc_samples) visualize_localnews_MCMC(data, train_x, train_y, train_i, test_x, test_y, test_i, model,\ likelihood, T0, station_le, 10) return elif INFERENCE == 'MAP': model.group_index_module._set_rho(0.0) model.group_t_covar_module.outputscale = 0.05**2 model.group_t_covar_module.base_kernel.lengthscale = 15 model.likelihood.noise_covar.noise = 0.05**2 model.unit_t_covar_module.outputscale = 0.05**2 model.unit_t_covar_module.base_kernel.lengthscale = 30 # weekday/day/unit effects initialize to 0.01**2 for i in range(len(X_max_v)): model.x_covar_module[i].c2 = torch.tensor(0.05**2) for name, param in model.drift_t_module.named_parameters(): param.requires_grad = True model.drift_t_module._set_T1(0.0) model.drift_t_module._set_T2(5.0) model.drift_t_module.base_kernel.lengthscale = 30.0 model.drift_t_module.outputscale = 0.05**2 # model.drift_t_module.raw_T1.requires_grad = False # model.drift_t_module.raw_T2.requires_grad = False optimizer = torch.optim.LBFGS(model.parameters(), lr=0.1, history_size=10, max_iter=4) model, likelihood = train(test_x, test_y, model, likelihood, mll, optimizer, training_iterations) torch.save(model.state_dict(), 'results/localnews_' + INFERENCE + '_model_state.pth') return elif INFERENCE == 'MCMC': model.group_index_module._set_rho(0.9) model.group_t_covar_module.outputscale = 0.02**2 model.group_t_covar_module.base_kernel._set_lengthscale(3) model.likelihood.noise_covar.noise = 0.03**2 model.unit_t_covar_module.outputscale = 0.02**2 model.unit_t_covar_module.base_kernel._set_lengthscale(30) # weekday/day/unit effects initialize to 0.0**2 for i in range(len(X_max_v) - 1): model.x_covar_module[i].c2 = torch.tensor(0.01**2) # model.x_covar_module[i].raw_c2.requires_grad = False initial_params = {'group_index_module.rho_prior': model.group_index_module.raw_rho.detach(),\ 'group_t_covar_module.base_kernel.lengthscale_prior': model.group_t_covar_module.base_kernel.raw_lengthscale.detach(),\ 'group_t_covar_module.outputscale_prior': model.group_t_covar_module.raw_outputscale.detach(),\ 'unit_t_covar_module.base_kernel.lengthscale_prior': model.unit_t_covar_module.base_kernel.raw_lengthscale.detach(),\ 'unit_t_covar_module.outputscale_prior': model.unit_t_covar_module.raw_outputscale.detach(),\ 'likelihood.noise_covar.noise_prior': model.likelihood.raw_noise.detach(), 'x_covar_module.0.c2_prior': model.x_covar_module[0].raw_c2.detach(), 'x_covar_module.1.c2_prior': model.x_covar_module[1].raw_c2.detach()} with gpytorch.settings.fast_computations( covar_root_decomposition=False, log_prob=False, solves=False): nuts_kernel = NUTS(pyro_model, adapt_step_size=True, adapt_mass_matrix=True, jit_compile=False,\ init_strategy=pyro.infer.autoguide.initialization.init_to_value(values=initial_params)) hmc_kernel = HMC(pyro_model, step_size=0.1, num_steps=10, adapt_step_size=True,\ init_strategy=pyro.infer.autoguide.initialization.init_to_mean()) mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps) mcmc_run.run(train_x, train_y) pickle.dump(mcmc_run, open("results/localnews_MCMC.pkl", "wb")) # plot_pyro_posterior(mcmc_run.get_samples(), transforms) return visualize_localnews_MCMC(data, train_x, train_y, train_g, test_x, test_y, test_i, model,\ likelihood, T0, station_le, num_samples) else: model.load_strict_shapes(False) state_dict = torch.load('results/localnews_MAP_model_state.pth') model.load_state_dict(state_dict) model2.load_state_dict(state_dict) print( f'Parameter name: rho value = {model.group_index_module.rho.detach().numpy()}' ) # print(f'Parameter name: unit mean value = {model.unit_mean_module.constant.detach().numpy()}') print( f'Parameter name: group ls value = {model.group_t_covar_module.base_kernel.lengthscale.detach().numpy()}' ) print( f'Parameter name: group os value = {np.sqrt(model.group_t_covar_module.outputscale.detach().numpy())}' ) print( f'Parameter name: unit ls value = {model.unit_t_covar_module.base_kernel.lengthscale.detach().numpy()}' ) print( f'Parameter name: unit os value = {np.sqrt(model.unit_t_covar_module.outputscale.detach().numpy())}' ) print( f'Parameter name: noise value = {np.sqrt(model.likelihood.noise.detach().numpy())}' ) print( f'Parameter name: weekday std value = {np.sqrt(model.x_covar_module[0].c2.detach().numpy())}' ) print( f'Parameter name: day std value = {np.sqrt(model.x_covar_module[1].c2.detach().numpy())}' ) print( f'Parameter name: unit std value = {np.sqrt(model.x_covar_module[2].c2.detach().numpy())}' ) print( f'Parameter name: drift ls value = {model.drift_t_module.base_kernel.lengthscale.detach().numpy()}' ) print( f'Parameter name: drift cov os value = {np.sqrt(model.drift_t_module.outputscale.detach().numpy())}' ) print( f'Parameter name: drift cov T1 value = {model.drift_t_module.T1.detach().numpy()}' ) print( f'Parameter name: drift cov T2 value = {model.drift_t_module.T2.detach().numpy()}' ) visualize_localnews(data, test_x, test_y, test_g, model, model2, likelihood, T0, station_le, train_condition)
irt_model, model_args=( args.ability_dim, num_person, num_item, device, response, mask, 1, ), num_chains=args.num_chains, ) start_time = time.time() nuts_kernel = NUTS(potential_fn = potential_fn) mcmc = MCMC( nuts_kernel, num_samples = args.num_samples, warmup_steps = args.num_warmup, num_chains = args.num_chains, initial_params = init_params, transforms = transforms, ) mcmc.run( args.ability_dim, num_person, num_item, device, response, mask,
# # torch.prod((fs > 0) == torch.ByteTensor(Y).squeeze()) # conditioned_model = pyro.condition(model, data={"ys":y_tensor}) conditioned_model = pyro.condition(model, data={}) thing = model(); thing thing.shape # fs.shape # likelihood.sample() # def model(data): # nuts_kernel = NUTS(model, adapt_step_size=True) nuts_kernel = NUTS(conditioned_model, adapt_step_size=True) mcmc_run = MCMC(nuts_kernel, num_samples=500, warmup_steps=300).run() posterior = pyro.infer.abstract_infer.EmpiricalMarginal(mcmc_run, 'fs') posterior. from pyro.infer.abstract_infer import EmpiricalMarginal import pyro.distributions as dist true_coefs = torch.tensor([1., 2., 3.]) data = torch.randn(2000, 3) dim = 3 labels = dist.Bernoulli(logits=(true_coefs * data).sum(-1)).sample() def model(data):
trgts.append(trgt) inpts = torch.cat(inpts).cuda()#[:25000] trgts = torch.cat(trgts).cuda()#[:25000] batch_size = 100#args.batch_size num_batches = inpts.shape[0] // batch_size print([num_batches, batch_size,*inpts.shape[1:]]) inpts = inpts.reshape([num_batches, batch_size, *inpts.shape[1:]]) trgts = trgts.reshape([num_batches, batch_size]) print("Inputs:", inpts.shape) print("Targets:", trgts.shape) printf, logfile = utils.get_logging_print(os.path.join(args.dir, args.log_fname + '-%s.txt')) print('Saving logs to: %s' % logfile) nuts_kernel = NUTS(pyro_model.model, step_size=10.) num_samples = 30 # x_, y_ = loaders["train"].dataset.tensors mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=10).run(inpts, trgts) #mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=100).run(islice(loaders["train"], 1000)) samples = torch.cat(list(mcmc_run.marginal(sites="t").support(flatten=True).values()), dim=-1) print(samples) utils.save_checkpoint( args.dir, 0, name='nuts', state_dict=pyro_model.state_dict() )
key: val.long().to(param.get("device")) for key, val in dummybatch.items() } if param['model_type'] == "rnn": model = models.RNN_Model(**param, item_group=torch.tensor( itemattr['category']).long()) elif param['model_type'] == "ar1": model = models.AR_Model(**param, item_group=torch.tensor( itemattr['category']).long()) #%% from pyro.infer.mcmc import NUTS, MCMC hmc_kernel = NUTS(model, jit_compile=True) init_par = { key: val for key, val in model.par_real.items() if key not in ['softmax_mult', "h0"] } init_par['h0-batch'] = model.par_real['h0'] mcmc = MCMC(hmc_kernel, num_samples=10000, warmup_steps=100, initial_params=init_par) #%% all_data = dataloaders['train'].dataset.data all_data['phase_mask'] = all_data['mask_train'] #%% TRAIN and save TRAIN = False
# fashion_mnist labels # [0: 'T-shirt', 1: 'Trouser', 2: 'Pullover', 3: 'Dress', 4: 'Coat', # 5: 'Sandal', 6: 'Shirt', 7: 'Sneaker', 8: 'Bag', 9: 'Ankle boot'] parser = argparse.ArgumentParser() parser.add_argument('--num-samples', type=int, default=1000) parser.add_argument('--num-warmups', type=int, default=1000) parser.add_argument('--num-chains', type=int, default=1) parser.add_argument('--overwrite', action='store_true') parser.add_argument('--nn-model') parser.add_argument('--dataset') parser.add_argument('--type') args = parser.parse_args() nn_model, program, run_args, save_fn = setting(args) nuts = NUTS(program) mcmc = MCMC(nuts, num_samples=args.num_samples, warmup_steps=args.num_warmups, num_chains=args.num_chains) mcmc.run(nn_model, *run_args) zs = mcmc.get_samples()['z'].detach().cpu().numpy() np.savetxt(save_fn, zs) # setting(args) # nuts = NUTS(program) # mcmc = MCMC(nuts, 2000) # mcmc.run(nn_model, target) # zs = mcmc.get_samples()['z'].detach().cpu().numpy() # np.savetxt(save_fn, zs)
def _mcmc_trainer(self, step_size=0.1, num_samples=1000, warmup_steps=100): mcmc_kernel = NUTS(self.model, step_size=step_size) self.mcmc = MCMC(mcmc_kernel, num_samples=num_samples, warmup_steps=warmup_steps) self.mcmc.run(self.X)
def run_inference( pyro_model: Callable, X: Tensor, Y: Tensor, Yvar: Tensor, num_samples: int = 512, warmup_steps: int = 1024, thinning: int = 16, use_input_warping: bool = False, max_tree_depth: int = 6, disable_progbar: bool = False, gp_kernel: str = "matern", verbose: bool = False, task_feature: Optional[int] = None, rank: Optional[int] = None, ) -> Dict[str, Tensor]: start = time.time() try: from pyro.infer.mcmc import NUTS, MCMC from pyro.infer.mcmc.util import print_summary except ImportError: # pragma: no cover raise RuntimeError("Cannot call run_inference without pyro installed!") kernel = NUTS( pyro_model, jit_compile=True, full_mass=True, ignore_jit_warnings=True, max_tree_depth=max_tree_depth, ) mcmc = MCMC( kernel, warmup_steps=warmup_steps, num_samples=num_samples, disable_progbar=disable_progbar, ) mcmc.run( X, Y, Yvar, use_input_warping=use_input_warping, gp_kernel=gp_kernel, task_feature=task_feature, rank=rank, ) # compute the true lengthscales and get rid of the temporary variables samples = mcmc.get_samples() inv_length_sq = (samples["kernel_tausq"].unsqueeze(-1) * samples["_kernel_inv_length_sq"]) samples["lengthscale"] = (1.0 / inv_length_sq).sqrt() # pyre-ignore [16] del samples["kernel_tausq"], samples["_kernel_inv_length_sq"] # this prints the summary if verbose: orig_std_out = sys.stdout.write sys.stdout.write = logger.info print_summary(samples, prob=0.9, group_by_chain=False) sys.stdout.write = orig_std_out logger.info(f"MCMC elapsed time: {time.time() - start}") # thin for k, v in samples.items(): samples[k] = v[::thinning] # apply thinning return samples
def main(args): baseball_dataset = pd.read_csv(DATA_URL, "\t") train, _, player_names = train_test_split(baseball_dataset) at_bats, hits = train[:, 0], train[:, 1] logging.info("Original Dataset:") logging.info(baseball_dataset) # (1) Full Pooling Model init_params, potential_fn, transforms, _ = initialize_model(fully_pooled, model_args=(at_bats, hits), num_chains=args.num_chains) nuts_kernel = NUTS(potential_fn=potential_fn) mcmc = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains, initial_params=init_params, transforms=transforms) mcmc.run(at_bats, hits) diagnostics = mcmc.diagnostics() samples_fully_pooled = mcmc.get_samples() logging.info("\nModel: Fully Pooled") logging.info("===================") logging.info("\nphi:") logging.info(summary(samples_fully_pooled, sites=["phi"], player_names=player_names, diagnostics=diagnostics)["phi"]) num_divergences = sum(map(len, diagnostics["divergences"].values())) logging.info("\nNumber of divergent transitions: {}\n".format(num_divergences)) sample_posterior_predictive(fully_pooled, samples_fully_pooled, baseball_dataset) evaluate_log_posterior_density(fully_pooled, samples_fully_pooled, baseball_dataset) # (2) No Pooling Model init_params, potential_fn, transforms, _ = initialize_model(not_pooled, model_args=(at_bats, hits), num_chains=args.num_chains) nuts_kernel = NUTS(potential_fn=potential_fn) mcmc = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains, initial_params=init_params, transforms=transforms) mcmc.run(at_bats, hits) diagnostics = mcmc.diagnostics() samples_not_pooled = mcmc.get_samples() logging.info("\nModel: Not Pooled") logging.info("=================") logging.info("\nphi:") logging.info(summary(samples_not_pooled, sites=["phi"], player_names=player_names, diagnostics=diagnostics)["phi"]) num_divergences = sum(map(len, diagnostics["divergences"].values())) logging.info("\nNumber of divergent transitions: {}\n".format(num_divergences)) sample_posterior_predictive(not_pooled, samples_not_pooled, baseball_dataset) evaluate_log_posterior_density(not_pooled, samples_not_pooled, baseball_dataset) # (3) Partially Pooled Model init_params, potential_fn, transforms, _ = initialize_model(partially_pooled, model_args=(at_bats, hits), num_chains=args.num_chains) nuts_kernel = NUTS(potential_fn=potential_fn) mcmc = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains, initial_params=init_params, transforms=transforms) mcmc.run(at_bats, hits) diagnostics = mcmc.diagnostics() samples_partially_pooled = mcmc.get_samples() logging.info("\nModel: Partially Pooled") logging.info("=======================") logging.info("\nphi:") logging.info(summary(samples_partially_pooled, sites=["phi"], player_names=player_names, diagnostics=diagnostics)["phi"]) num_divergences = sum(map(len, diagnostics["divergences"].values())) logging.info("\nNumber of divergent transitions: {}\n".format(num_divergences)) sample_posterior_predictive(partially_pooled, samples_partially_pooled, baseball_dataset) evaluate_log_posterior_density(partially_pooled, samples_partially_pooled, baseball_dataset) # (4) Partially Pooled with Logit Model init_params, potential_fn, transforms, _ = initialize_model(partially_pooled_with_logit, model_args=(at_bats, hits), num_chains=args.num_chains) nuts_kernel = NUTS(potential_fn=potential_fn, transforms=transforms) mcmc = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains, initial_params=init_params, transforms=transforms) mcmc.run(at_bats, hits) diagnostics = mcmc.diagnostics() samples_partially_pooled_logit = mcmc.get_samples() logging.info("\nModel: Partially Pooled with Logit") logging.info("==================================") logging.info("\nSigmoid(alpha):") logging.info(summary(samples_partially_pooled_logit, sites=["alpha"], player_names=player_names, transforms={"alpha": torch.sigmoid}, diagnostics=diagnostics)["alpha"]) num_divergences = sum(map(len, diagnostics["divergences"].values())) logging.info("\nNumber of divergent transitions: {}\n".format(num_divergences)) sample_posterior_predictive(partially_pooled_with_logit, samples_partially_pooled_logit, baseball_dataset) evaluate_log_posterior_density(partially_pooled_with_logit, samples_partially_pooled_logit, baseball_dataset)
def run_mcmc(data): kernel = NUTS(model) mcmc = MCMC(kernel, num_samples=250, warmup_steps=50) mcmc.run(data)
def localnews(INFERENCE): with open('model/conf.json') as f: configs = json.load(f) sigma_noise = configs["sigma_noise"] data = pd.read_csv("data/localnews.csv",index_col=[0]) # data.national_politics = np.log(data.national_politics/(1-data.national_politics)) data.date = data.date.apply(lambda x: datetime.datetime.strptime(x, '%m/%d/%Y').date()) # data = data.sort_values(by=['date']) # data = data[(data.date<=datetime.date(2017, 9, 10)) & (data.date>=datetime.date(2017, 8, 20))] # data = data[data.station_id.isin([1345,1350])] N = data.station_id.unique().shape[0] date_le = LabelEncoder() ds = data.date date_le.fit(ds) ds = date_le.transform(ds).reshape((-1,1)) ohe = OneHotEncoder() X = data.drop(columns=["station_id", "date", "national_politics", "sinclair2017", "post","weekday","affiliation","callsign"]) # "weekday","affiliation","callsign" ohe.fit(X) X = ohe.transform(X).toarray() station_le = LabelEncoder() ids = data.station_id station_le.fit(ids) ids = station_le.transform(ids) X = np.concatenate((X,ds), axis=1) Y = data.national_politics.to_numpy() T0 = date_le.transform(np.array([datetime.date(2017, 9, 1)])) train_condition = (data.post!=1) | (data.sinclair2017!=1) train_x = torch.Tensor(X[train_condition]).double() train_y = torch.Tensor(Y[train_condition]).double() idx = data.sinclair2017.to_numpy() train_i = torch.from_numpy(idx[train_condition]) test_x = torch.Tensor(X).double() test_y = torch.Tensor(Y).double() test_i = torch.from_numpy(idx) # fit = TwoWayFixedEffectModel(X_tr, X_co, Y_tr, Y_co, ATT, T0) # return noise_prior = gpytorch.priors.GammaPrior(concentration=1,rate=2) likelihood = gpytorch.likelihoods.GaussianLikelihood(noise_prior=noise_prior,\ noise_constraint=gpytorch.constraints.Positive()) model = MultitaskGPModel((train_x, train_i), train_y, N, likelihood) # fix some parameters model.c_covar_module._set_c2(torch.var(train_y)) # model.mean_module[0].constant.data.fill_(torch.mean(train_y[train_i==0]).double()) # model.mean_module[1].constant.data.fill_(torch.mean(train_y[train_i==1]).double()) # model.mean_module[0].constant.requires_grad = False # model.mean_module[1].constant.requires_grad = False model.c_covar_module.raw_c2.requires_grad = False model.i_mean_module.bias.data.fill_(torch.mean(train_y[train_i==0]).double()) slope = torch.mean(train_y[train_i==1]).double()-torch.mean(train_y[train_i==0]).double() model.i_mean_module.weights.data.fill_(slope) model.i_mean_module.weights.requires_grad = False model.i_mean_module.bias.requires_grad = False model.double() likelihood.double() # plot_prior(model) # return # visualize_localnews(data, train_x, train_y, train_i, test_x, test_y, test_i, model,\ # likelihood, T0, date_le, station_le) # return # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) def pyro_model(x, i, y): model.pyro_sample_from_prior() output = model(x, i) loss = mll.pyro_factor(output, y) return y # B = model.task_covar_module.base_kernel.covar_factor # v = model.task_covar_module.base_kernel.raw_var # print(torch.matmul(B,B.T)+torch.exp(v)) if not os.path.isdir("results"): os.mkdir("results") if INFERENCE=='MCMCLOAD': # model.load_strict_shapes(False) # state_dict = torch.load('results/localnews_MCMC_model_state.pth') # model.load_state_dict(state_dict) # for param_name, param in model.named_parameters(): # print(f'Parameter name: {param_name:42} value = {param.detach().numpy()}') with open('results/localnews_MCMC.pkl', 'rb') as f: mcmc_run = pickle.load(f) mcmc_samples = mcmc_run.get_samples() model.pyro_load_from_samples(mcmc_samples) plot_posterior(mcmc_samples) visualize_localnews_MCMC(data, train_x, train_y, train_i, test_x, test_y, test_i, model,\ likelihood, T0, date_le, station_le, num_samples) elif INFERENCE=='MAP': model.task_covar_module._set_rho(0.0) model.t_covar_module.outputscale = 0.05**2 model.t_covar_module.base_kernel.lengthscale = 14 model.likelihood.noise_covar.noise = 0.05**2 optimizer = torch.optim.Adam(model.parameters(), lr=0.1) # Includes GaussianLikelihood parameters model, likelihood = train(train_x, train_i, train_y, model, likelihood, mll, optimizer) torch.save(model.state_dict(), 'results/localnews_' + INFERENCE + '_model_state.pth') elif INFERENCE=='MCMC': nuts_kernel = NUTS(pyro_model, adapt_step_size=True) mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps, disable_progbar=smoke_test) mcmc_run.run(train_x, train_i, train_y) # save the posterior # with open('results/localnews_' + INFERENCE+ '.pkl', 'wb') as f: # pickle.dump(mcmc_run.get_samples(), f) pickle.dump(mcmc_run, open("results/localnews_MCMC.pkl", "wb")) torch.save(model.state_dict(), 'results/localnews_' + INFERENCE +'_model_state.pth') else: model.load_strict_shapes(False) state_dict = torch.load('results/localnews_MAP_model_state.pth') model.load_state_dict(state_dict) for param_name, param in model.named_parameters(): print(f'Parameter name: {param_name:42} value = {param.detach().numpy()}') visualize_localnews(data, train_x, train_y, train_i, test_x, test_y, test_i, model,\ likelihood, T0, date_le, station_le)
def run( task: Task, num_samples: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, num_chains: int = 10, num_warmup: int = 10000, kernel: str = "slice", kernel_parameters: Optional[Dict[str, Any]] = None, thinning: int = 1, diagnostics: bool = True, available_cpu: int = 1, mp_context: str = "fork", jit_compile: bool = False, automatic_transforms_enabled: bool = True, initial_params: Optional[torch.Tensor] = None, **kwargs: Any, ) -> torch.Tensor: """Runs MCMC using Pyro on potential function Produces `num_samples` while accounting for warmup (burn-in) and thinning. Note that the actual number of simulations is not controlled for with MCMC since algorithms are only used as a reference method in the benchmark. MCMC is run on the potential function, which returns the unnormalized negative log posterior probability. Note that this requires a tractable likelihood. Pyro is used to automatically construct the potential function. Args: task: Task instance num_samples: Number of samples to generate from posterior num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` num_chains: Number of chains num_warmup: Warmup steps, during which parameters of the sampler are adapted. Warmup samples are not returned by the algorithm. kernel: HMC, NUTS, or Slice kernel_parameters: Parameters passed to kernel thinning: Amount of thinning to apply, in order to avoid drawing correlated samples from the chain diagnostics: Flag for diagnostics available_cpu: Number of CPUs used to parallelize chains mp_context: multiprocessing context, only fork might work jit_compile: Just-in-time (JIT) compilation, can yield significant speed ups automatic_transforms_enabled: Whether or not to use automatic transforms initial_params: Parameters to initialize at Returns: Samples from posterior """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) tic = time.time() log = sbibm.get_logger(__name__) hook_fn = None if diagnostics: log.info(f"MCMC sampling for observation {num_observation}") tb_writer, tb_close = tb_make_writer( logger=log, basepath= f"tensorboard/pyro_{kernel.lower()}/observation_{num_observation}", ) hook_fn = tb_make_hook_fn(tb_writer) if "num_simulations" in kwargs: warnings.warn( "`num_simulations` was passed as a keyword but will be ignored, see docstring for more info." ) # Prepare model and transforms conditioned_model = task._get_pyro_model(num_observation=num_observation, observation=observation) transforms = task._get_transforms( num_observation=num_observation, observation=observation, automatic_transforms_enabled=automatic_transforms_enabled, ) kernel_parameters = kernel_parameters if kernel_parameters is not None else {} kernel_parameters["jit_compile"] = jit_compile kernel_parameters["transforms"] = transforms log.info("Using kernel: {name}({parameters})".format( name=kernel, parameters=",".join([f"{k}={v}" for k, v in kernel_parameters.items()]), )) if kernel.lower() == "nuts": mcmc_kernel = NUTS(model=conditioned_model, **kernel_parameters) elif kernel.lower() == "hmc": mcmc_kernel = HMC(model=conditioned_model, **kernel_parameters) elif kernel.lower() == "slice": mcmc_kernel = Slice(model=conditioned_model, **kernel_parameters) else: raise NotImplementedError if initial_params is not None: site_name = "parameters" initial_params = {site_name: transforms[site_name](initial_params)} else: initial_params = None mcmc_parameters = { "num_chains": num_chains, "num_samples": thinning * num_samples, "warmup_steps": num_warmup, "available_cpu": available_cpu, "initial_params": initial_params, } log.info("Calling MCMC with: MCMC({name}_kernel, {parameters})".format( name=kernel, parameters=",".join([f"{k}={v}" for k, v in mcmc_parameters.items()]), )) mcmc = MCMC(mcmc_kernel, hook_fn=hook_fn, **mcmc_parameters) mcmc.run() toc = time.time() log.info(f"Finished MCMC after {toc-tic:.3f} seconds") log.info(f"Automatic transforms {mcmc.transforms}") log.info(f"Apply thinning of {thinning}") mcmc._samples = { "parameters": mcmc._samples["parameters"][:, ::thinning, :] } num_samples_available = (mcmc._samples["parameters"].shape[0] * mcmc._samples["parameters"].shape[1]) if num_samples_available < num_samples: warnings.warn("Some samples will be included multiple times") samples = mcmc.get_samples( num_samples=num_samples, group_by_chain=False)["parameters"].squeeze() else: samples = mcmc.get_samples( group_by_chain=False)["parameters"].squeeze() idx = torch.randperm(samples.shape[0])[:num_samples] samples = samples[idx, :] assert samples.shape[0] == num_samples if diagnostics: mcmc.summary() tb_ess(tb_writer, mcmc) tb_r_hat(tb_writer, mcmc) tb_marginals(tb_writer, mcmc) tb_acf(tb_writer, mcmc) tb_posteriors(tb_writer, mcmc) tb_plot_posterior(tb_writer, samples, tag="posterior/final") tb_close() return samples
import pickle pyro.enable_validation(True) pyro.set_rng_seed(0) train_dataset = ToyDataset() train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=len(train_dataset)) for step, (x, y) in enumerate(train_loader): x = Variable(x).cuda() # (shape: (batch_size, 2)) y = Variable(y).cuda() # (shape: (batch_size, )) nuts_kernel = NUTS( model, jit_compile=False, ) posterior = MCMC(nuts_kernel, num_samples=1000, warmup_steps=1000, num_chains=1).run( x, y) # num_samples=1000, warmup_steps=1000 fc1_weight_samples = EmpiricalMarginal( posterior, sites=["module$$$fc1.weight"])._get_samples_and_weights( )[0].cpu().numpy() # (shape: (num_samples, 1, shape1, shape2)) fc1_bias_samples = EmpiricalMarginal( posterior, sites=["module$$$fc1.bias"])._get_samples_and_weights( )[0].cpu().numpy() # (shape: (num_samples, 1, shape1, shape2)) fc2_weight_samples = EmpiricalMarginal(
dist.DirichletMultinomial( total_count=(N_total * haul_prob_patch).int(), concentration=concentration_subp), obs=torch.Tensor(data.N_ice)) # get observed seals (includes false-positives) #print(N_ice.shape) #print(dist.Poisson(false_pos).sample().shape) #False_pos = pyro.sample("False_pos", dist.Poisson(false_pos)) #det_probs = pyro.sample('det_probs', dist.Beta(softplus(a1_det0 * torch.Tensor(data.sea_ice_subp) + b_det0), # a1_det1 * torch.Tensor(data.sea_ice_subp) + b_det1)) #pyro.sample("N_obs", dist.Binomial(N_ice + False_pos / det_probs, det_probs), obs=torch.Tensor(data.N_obs)) nuts_kernel = NUTS(model) intial_params = { 'a_tot': torch.zeros([N_CHAINS]), 'b_tot': torch.zeros([N_CHAINS]), 'a_con': torch.zeros([N_CHAINS]), 'b_con': torch.zeros([N_CHAINS]), 'a_fp': torch.zeros([N_CHAINS]), 'b_fp': torch.zeros([N_CHAINS]) } mcmc = MCMC(nuts_kernel, num_samples=1000, warmup_steps=200, num_chains=N_CHAINS) mcmc.run(training_data)
ss.plot_eigenvalues(figsize=(6, 4)) ss.plot_sufficient_summary(x.detach().numpy(), f.detach().numpy(), figsize=(6, 4)) kernel = GPy.kern.RBF(input_dim=1, ARD=True) gp = GPy.models.GPRegression( ss.transform(x.detach().numpy())[0], f.reshape(-1, 1).detach().numpy(), kernel) gp.optimize_restarts(5, verbose=False) # Use No U-Turn Sampler (NUTS) Hamiltonian Monte Carlo to sample from the posterior of the original model. #plain NUTS num_chains = 1 num_samples = 100 kernel = NUTS(model) mcmc = MCMC(kernel, num_samples=num_samples, warmup_steps=100, num_chains=num_chains) mcmc.run(f) mcmc.summary() mcmc_samples = mcmc.get_samples(group_by_chain=True) print(mcmc_samples.keys()) chains = mcmc_samples["input"] print(chains.shape) # Show the probablity posterior distribution of each inputs' component (input_dim). for i in range(5): plt.figure(figsize=(6, 4)) sns.distplot(mcmc_samples['input'][:, :, i])
def model(utt, phi): a = pyro.sample("a", dist.Uniform(.1, 20.)) b = pyro.sample("b", dist.Uniform(.1, 20.)) mu = pyro.sample("mu", dist.Beta(a,b)) nu = pyro.sample("nu", dist.LogNormal(2,0.5)) a2 = mu * (nu + 1) b2 = (1-mu) * (nu + 1) with pyro.plate("data"): pyro.sample("obs", RSASpeaker(mu, phi), obs=utt) utt = torch.ones(200) utt[50:200] = 0 phi = torch.rand(50) * .3 phi2 = torch.rand(150) * 0.6 + .4 phi = torch.cat([phi, phi2]) nuts_kernel = NUTS(model, jit_compile=True, adapt_step_size=True) hmc_posterior = MCMC(nuts_kernel, num_samples=100, warmup_steps=200) \ .run(utt, phi) print(EmpiricalMarginal(hmc_posterior, "mu")._get_samples_and_weights()[0]) print(hmc_posterior.marginal('mu').empirical['mu'].mean)
label = args.label return nn_model, label, save_fn if __name__ == '__main__': set_start_method('spawn') # fashion_mnist labels # [0: 'T-shirt', 1: 'Trouser', 2: 'Pullover', 3: 'Dress', 4: 'Coat', # 5: 'Sandal', 6: 'Shirt', 7: 'Sneaker', 8: 'Bag', 9: 'Ankle boot'] parser = argparse.ArgumentParser() parser.add_argument('--num-samples', type=int, default=1000) parser.add_argument('--num-warmups', type=int, default=1000) parser.add_argument('--num-chains', type=int, default=1) parser.add_argument('--overwrite', action='store_true') parser.add_argument('--nn-model') parser.add_argument('--dataset') parser.add_argument('--label', type=int) args = parser.parse_args() nn_model, label, save_fn = setting(args) nuts = NUTS(program_ood) mcmc = MCMC(nuts, num_samples=args.num_samples, warmup_steps=args.num_warmups, num_chains=args.num_chains) mcmc.run(nn_model, label) zs = mcmc.get_samples()['z'].detach().cpu().numpy() np.savetxt(save_fn, zs)
import torch import pandas as pd import pyro from pyro.infer.mcmc import MCMC, NUTS from pyro.infer.abstract_infer import EmpiricalMarginal from model4_5 import model pyro.set_rng_seed(1234) # Enable validation checks pyro.enable_validation(True) d = pd.read_csv('input/data-salary.txt').astype('float32') d = torch.tensor(d.values) x_data, y_data = d[:, 0], d[:, -1] nuts_kernel = NUTS(model, adapt_step_size=True, jit_compile=True, ignore_jit_warnings=True) hmc_posterior = MCMC(nuts_kernel, num_samples=1000, num_chains=4, warmup_steps=200).run(x_data, y_data) posterior_a = EmpiricalMarginal(hmc_posterior, 'a') posterior_b = EmpiricalMarginal(hmc_posterior, 'b') posterior_sigma = EmpiricalMarginal(hmc_posterior, 'sigma')
def test_model_with_param(jit_compile, num_chains): kernel = NUTS(model_with_param, jit_compile=jit_compile, ignore_jit_warnings=True) mcmc = MCMC(kernel, 10, num_chains=num_chains, mp_context="spawn") mcmc.run()
import argparse import logging import math import os import pandas as pd import torch import pyro from pyro.distributions import Beta, Binomial, HalfCauchy, Normal, Pareto, Uniform from pyro.distributions.util import logsumexp from pyro.infer.abstract_infer import TracePredictive from pyro.infer.mcmc import MCMC, NUTS """ Example has been adapted from [1]. It demonstrates how to do Bayesian inference using NUTS (or, HMC) in Pyro, and use of some common inference utilities. As in the Stan tutorial, this uses the small baseball dataset of Efron and Morris [2] to estimate players' batting average which is the fraction of times a player got a base hit out of the number of times they went up at bat. The dataset separates the initial 45 at-bats statistics from the remaining season. We use the hits data from the initial 45 at-bats to estimate the batting average for each player. We then use the remaining season's data to validate the predictions from our models. Three models are evaluated: - Complete pooling model: The success probability of scoring a hit is shared amongst all players.
def synthetic(INFERENCE): # load configurations with open('model/conf.json') as f: configs = json.load(f) N_tr = configs["N_tr"] N_co = configs["N_co"] N = N_tr + N_co T = configs["T"] T0 = configs["T0"] d = configs["d"] noise_std = configs["noise_std"] Delta = configs["treatment_effect"] seed = configs["seed"] X_tr, X_co, Y_tr, Y_co, ATT = generate_synthetic_data( N_tr, N_co, T, T0, d, Delta, noise_std, seed) train_x_tr = X_tr[:, :T0].reshape(-1, d + 1) train_x_co = X_co.reshape(-1, d + 1) train_y_tr = Y_tr[:, :T0].reshape(-1) train_y_co = Y_co.reshape(-1) train_x = torch.cat([train_x_tr, train_x_co]) train_y = torch.cat([train_y_tr, train_y_co]) # treat group 1, control group 0 train_i_tr = torch.full_like(train_y_tr, dtype=torch.long, fill_value=1) train_i_co = torch.full_like(train_y_co, dtype=torch.long, fill_value=0) train_i = torch.cat([train_i_tr, train_i_co]) # fit = TwoWayFixedEffectModel(X_tr, X_co, Y_tr, Y_co, ATT, T0) # return # train_x, train_y, train_i = build_gpytorch_data(X_tr, X_co, Y_tr, Y_co, T0) likelihood = gpytorch.likelihoods.GaussianLikelihood() model = MultitaskGPModel((train_x, train_i), train_y, N, likelihood) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) def pyro_model(x, i, y): model.pyro_sample_from_prior() output = model(x, i) loss = mll.pyro_factor(output, y) return y if not os.path.isdir("results"): os.mkdir("results") if INFERENCE == 'MAPLOAD': model.load_strict_shapes(False) state_dict = torch.load('results/synthetic_MAP_model_state.pth') model.load_state_dict(state_dict) elif INFERENCE == "MAP": # Use the adam optimizer optimizer = torch.optim.Adam( model.parameters(), lr=0.1) # Includes GaussianLikelihood parameters model, likelihood = train(train_x, train_i, train_y, model, likelihood, mll, optimizer) torch.save(model.state_dict(), 'results/synthetic_' + INFERENCE + '_model_state.pth') else: nuts_kernel = NUTS(pyro_model, adapt_step_size=True) mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps, disable_progbar=smoke_test) mcmc_run.run(train_x, train_i, train_y) torch.save(model.state_dict(), 'results/synthetic_' + INFERENCE + '_model_state.pth') visualize_synthetic(X_tr, X_co, Y_tr, Y_co, ATT, model, likelihood, T0)
def main(args): pyro.set_rng_seed(args.rng_seed) baseball_dataset = pd.read_csv(DATA_URL, "\t") train, _, player_names = train_test_split(baseball_dataset) at_bats, hits = train[:, 0], train[:, 1] logging.info("Original Dataset:") logging.info(baseball_dataset) num_predictive_samples = args.num_samples * args.num_chains # (1) Full Pooling Model nuts_kernel = NUTS(fully_pooled) posterior_fully_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains).run(at_bats, hits) logging.info("\nModel: Fully Pooled") logging.info("===================") logging.info("\nphi:") logging.info(summary(posterior_fully_pooled, sites=["phi"], player_names=player_names)["phi"]) posterior_predictive = TracePredictive(fully_pooled, posterior_fully_pooled, num_samples=num_predictive_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(posterior_predictive, baseball_dataset) # (2) No Pooling Model nuts_kernel = NUTS(not_pooled) posterior_not_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains).run(at_bats, hits) logging.info("\nModel: Not Pooled") logging.info("=================") logging.info("\nphi:") logging.info(summary(posterior_not_pooled, sites=["phi"], player_names=player_names)["phi"]) posterior_predictive = TracePredictive(not_pooled, posterior_not_pooled, num_samples=num_predictive_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(posterior_predictive, baseball_dataset) # (3) Partially Pooled Model # TODO: remove once htps://github.com/uber/pyro/issues/1458 is resolved if "CI" not in os.environ: nuts_kernel = NUTS(partially_pooled) posterior_partially_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains).run(at_bats, hits) logging.info("\nModel: Partially Pooled") logging.info("=======================") logging.info("\nphi:") logging.info(summary(posterior_partially_pooled, sites=["phi"], player_names=player_names)["phi"]) posterior_predictive = TracePredictive(partially_pooled, posterior_partially_pooled, num_samples=num_predictive_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(posterior_predictive, baseball_dataset) # (4) Partially Pooled with Logit Model nuts_kernel = NUTS(partially_pooled_with_logit) posterior_partially_pooled_with_logit = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains).run(at_bats, hits) logging.info("\nModel: Partially Pooled with Logit") logging.info("==================================") logging.info("\nSigmoid(alpha):") logging.info(summary(posterior_partially_pooled_with_logit, sites=["alpha"], player_names=player_names, transforms={"alpha": lambda x: 1. / (1 + (-x).exp())})["alpha"]) posterior_predictive = TracePredictive(partially_pooled_with_logit, posterior_partially_pooled_with_logit, num_samples=num_predictive_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(posterior_predictive, baseball_dataset)
bonds=torch.dist(M[2:-1], M[3:]) with pyro.plate("bonds"): bond_obs=pyro.sample("bonds", dist.StudentT(1, bonds, 0.001), obs=torch.tensor(3.8)) # Add a distance restraint between first and last point # Standard deviation of pairwise distance sd=pyro.sample("sigma_dist", dist.HalfCauchy(scale=0.1)) d = torch.dist(M[0], M[-1]) d_obs = pyro.sample("d_obs", dist.StudentT(1, d, 0.001), obs=torch.tensor(10)) filename_pdb= if __name__=="__main__": # Nr samples S= 500 # Nr samples burn-in B=70 # Do NUTS sampling nuts_kernel = NUTS(model, adapt_step_size=True) mcmc_sampler = MCMC(nuts_kernel, num_samples=S, warmup_steps=B) posterior = mcmc_sampler.run() # Get the last sampled points samples = get_samples(posterior, "M") # Save to PDB file M_last=samples[S-1] M=torch.cat((M_first, M_last)) # Add fixed first 3 coordinates save_M(M, filname_pdb+".pdb")
def init_mcmc(self, seed=42): self.set_seed(seed) kernel = NUTS(self.model) self.mcmc = MCMC(kernel, num_samples=self.num_samples, warmup_steps=self.warmup_steps) print("Initialized MCMC with NUTS kernal")
) # In[59]: plot_uq(hmc_posterior_samples, X, Xnew, "HMC") # ## NUTS # In[60]: ## NUTS ### pyro.clear_param_store() pyro.set_rng_seed(2) nuts = MCMC(NUTS(gpc, target_accept_prob=0.8, max_tree_depth=10, jit_compile=True), num_samples=500, warmup_steps=500) nuts.run(X, y.double()) nuts_posterior_samples = nuts.get_samples() # In[61]: plot_uq(nuts_posterior_samples, X, Xnew, "NUTS") # ## ADVI # In[81]:
categories, words = torch.stack(categories), torch.stack(words) # split into supervised data and unsupervised data supervised_categories = categories[:num_supervised_data] supervised_words = words[:num_supervised_data] unsupervised_words = words[num_supervised_data:] def forward_log_prob(prev_log_prob, curr_word, transition_log_prob, emission_log_prob): log_prob = emission_log_prob[:, curr_word] + transition_log_prob + prev_log_prob.unsqueeze(dim=1) return log_prob.logsumexp(dim=0) def unsupervised_hmm(words): with pyro.plate("prob_plate", num_categories): transition_prob = pyro.sample("transition_prob", dist.Dirichlet(transition_prior)) emission_prob = pyro.sample("emission_prob", dist.Dirichlet(emission_prior)) transition_log_prob = transition_prob.log() emission_log_prob = emission_prob.log() log_prob = emission_log_prob[:, words[0]] for t in range(1, len(words)): log_prob = forward_log_prob(log_prob, words[t], transition_log_prob, emission_log_prob) prob = log_prob.logsumexp(dim=0).exp() # a trick to inject an additional log_prob into model's log_prob pyro.sample("forward_prob", dist.Bernoulli(prob), obs=torch.tensor(1.)) nuts_kernel = NUTS(unsupervised_hmm, jit_compile=True, ignore_jit_warnings=True) mcmc = MCMC(nuts_kernel, num_samples=100) mcmc.run(unsupervised_words) trace_transition_prob = mcmc.get_samples()["transition_prob"] print(trace_transition_prob)