def main(args): # create an importance sampler (the prior is used as the proposal distribution) importance = Importance(model, guide=None, num_samples=args.num_samples) # get posterior samples of mu (which is the return value of model) # from the raw execution traces provided by the importance sampler. print("doing importance sampling...") emp_marginal = EmpiricalMarginal(importance.run(observed_data)) # calculate statistics over posterior samples posterior_mean = emp_marginal.mean posterior_std_dev = emp_marginal.variance.sqrt() # report results inferred_mu = posterior_mean.item() inferred_mu_uncertainty = posterior_std_dev.item() print("the coefficient of friction inferred by pyro is %.3f +- %.3f" % (inferred_mu, inferred_mu_uncertainty)) # note that, given the finite step size in the simulator, the simulated descent times will # not precisely match the numbers from the analytic result. # in particular the first two numbers reported below should match each other pretty closely # but will be systematically off from the third number print("the mean observed descent time in the dataset is: %.4f seconds" % observed_mean) print("the (forward) simulated descent time for the inferred (mean) mu is: %.4f seconds" % simulate(posterior_mean).item()) print(("disregarding measurement noise, elementary calculus gives the descent time\n" + "for the inferred (mean) mu as: %.4f seconds") % analytic_T(posterior_mean.item())) """
def policy_control_as_inference_like(env, *, trajectory_model, agent_model, log=False): """policy_control_as_inference_like Implements a control-as-inference-like policy which "maximizes" $\\Pr(A_0 \\mid S_0, high G)$. Not actually standard CaI, because we don't really condition on G; rather, we use $\\alpha G$ as a likelihood factor on sample traces. :param env: OpenAI Gym environment :param trajectory_model: trajectory probabilistic program :param agent_model: agent's probabilistic program :param log: boolean; if True, print log info """ inference = Importance(trajectory_model, num_samples=args.num_samples) posterior = inference.run(env, agent_model=agent_model, factor_G=True) marginal = EmpiricalMarginal(posterior, 'A_0') if log: samples = marginal.sample((args.num_samples, )) counts = Counter(samples.tolist()) hist = [ counts[i] / args.num_samples for i in range(env.action_space.n) ] print('policy:') print(tabulate([hist], headers=env.actions, tablefmt='fancy_grid')) return marginal.sample()
def main(args): # create an importance sampler (the prior is used as the proposal distribution) importance = Importance(model, guide=None, num_samples=args.num_samples) # get posterior samples of mu (which is the return value of model) # from the raw execution traces provided by the importance sampler. print("doing importance sampling...") emp_marginal = EmpiricalMarginal(importance.run(observed_data)) # calculate statistics over posterior samples posterior_mean = emp_marginal.mean posterior_std_dev = emp_marginal.variance.sqrt() # report results inferred_mu = posterior_mean.item() inferred_mu_uncertainty = posterior_std_dev.item() print("the coefficient of friction inferred by pyro is %.3f +- %.3f" % (inferred_mu, inferred_mu_uncertainty)) # note that, given the finite step size in the simulator, the simulated descent times will # not precisely match the numbers from the analytic result. # in particular the first two numbers reported below should match each other pretty closely # but will be systematically off from the third number print("the mean observed descent time in the dataset is: %.4f seconds" % observed_mean) print( "the (forward) simulated descent time for the inferred (mean) mu is: %.4f seconds" % simulate(posterior_mean).item()) print(( "disregarding measurement noise, elementary calculus gives the descent time\n" + "for the inferred (mean) mu as: %.4f seconds") % analytic_T(posterior_mean.item())) """
def get_log_marginal_density(loader): model.eval() meter = AverageMeter() pbar = tqdm(total=len(loader)) with torch.no_grad(): for _, response, _, mask in loader: mb = response.size(0) response = response.to(device) mask = mask.long().to(device) posterior = Importance( model.model, guide=model.guide, num_samples=args.num_posterior_samples, ) posterior = posterior.run(response, mask) log_weights = torch.stack(posterior.log_weights) marginal = torch.logsumexp(log_weights, 0) - math.log( log_weights.size(0)) meter.update(marginal.item(), mb) pbar.update() pbar.set_postfix({'Marginal': meter.avg}) pbar.close() print('====> Marginal: {:.4f}'.format(meter.avg)) return meter.avg
def test_add_factor(self): """ Testing output of add_factor() function """ expected_logmass = torch.tensor(-100.0) val = torch.tensor(-1.0) def model(): add_factor(val, 'exp_util_test') # alpha is 100.0 posterior = Importance(model, num_samples=1).run() actual_logmass = next(posterior._traces())[1] self.assertEqual(actual_logmass, expected_logmass)
def infer_actions(self, state): num_samples = 50 action_posterior = Importance(self.action_model, num_samples=num_samples).run(state) possib_vals, probs = self.infer_prob(action_posterior, torch.tensor((0, 1)), num_samples) return possib_vals, probs
def run_IS(model, guide, guess, samples=100, posterior_var='weight'): is_posterior = Importance(model, guide=guide, num_samples=samples).run(guess) is_marginal = pyro.infer.EmpiricalMarginal(is_posterior, posterior_var) is_samples = np.asarray([is_marginal().detach().item() for _ in range(samples)]) print("mean: {0:.2f}, std: {1:.2f}".format(is_samples.mean(), is_samples.std())) return is_samples
def infer(): action_sampler = env.action_space trajectory_sampler = None imm_timestamp = 30 num_samples = 50 for i in range(50): posterior = Importance(model, num_samples=num_samples).run( trajectory_sampler, action_sampler, imm_timestamp) trajectory_sampler = EmpiricalMarginal(posterior) samples = trajectory_sampler.sample((num_samples, 1)) possible_vals, counts = torch.unique(input=torch.flatten(samples, end_dim=1), sorted=True, return_counts=True, dim=0) probs = torch.true_divide(counts, num_samples) assert torch.allclose(torch.sum(probs), torch.tensor(1.0)) print("possible_Traj") print(possible_vals) print(probs) imm_timestamp += 10 num_samples += 50 print("in {}th inference, sample traj is".format(i), trajectory_sampler.sample()) return trajectory_sampler
def _vi_ape(model, design, observation_labels, target_labels, vi_parameters, is_parameters, y_dist=None): svi_num_steps = vi_parameters.pop('num_steps') def posterior_entropy(y_dist, design): # Important that y_dist is sampled *within* the function y = pyro.sample("conditioning_y", y_dist) y_dict = {label: y[i, ...] for i, label in enumerate(observation_labels)} conditioned_model = pyro.condition(model, data=y_dict) svi = SVI(conditioned_model, **vi_parameters) with poutine.block(): for _ in range(svi_num_steps): svi.step(design) # Recover the entropy with poutine.block(): guide = vi_parameters["guide"] entropy = mean_field_entropy(guide, [design], whitelist=target_labels) return entropy if y_dist is None: y_dist = EmpiricalMarginal(Importance(model, **is_parameters).run(design), sites=observation_labels) # Calculate the expected posterior entropy under this distn of y loss_dist = EmpiricalMarginal(Search(posterior_entropy).run(y_dist, design)) loss = loss_dist.mean return loss
def _laplace_vi_ape(model, design, observation_labels, target_labels, guide, loss, optim, num_steps, final_num_samples, y_dist=None): def posterior_entropy(y_dist, design): # Important that y_dist is sampled *within* the function y = pyro.sample("conditioning_y", y_dist) y_dict = {label: y[i, ...] for i, label in enumerate(observation_labels)} conditioned_model = pyro.condition(model, data=y_dict) # Here just using SVI to run the MAP optimization guide.train() svi = SVI(conditioned_model, guide=guide, loss=loss, optim=optim) with poutine.block(): for _ in range(num_steps): svi.step(design) # Recover the entropy with poutine.block(): final_loss = loss(conditioned_model, guide, design) guide.finalize(final_loss, target_labels) entropy = mean_field_entropy(guide, [design], whitelist=target_labels) return entropy if y_dist is None: y_dist = EmpiricalMarginal(Importance(model, num_samples=final_num_samples).run(design), sites=observation_labels) # Calculate the expected posterior entropy under this distn of y loss_dist = EmpiricalMarginal(Search(posterior_entropy).run(y_dist, design)) ape = loss_dist.mean return ape
def infer_utility(self, state, action, time_left, n_samples = 10): """ Infers value and probability of utility. Input: - current state, action, time_left Output:- values and probabilities each utility being 1 Calls: utility_model() """ # Get param name: util_param_name = 'util_state{}_action{}_time{}'.format(state.int(), action.int(), time_left.int()) # if already computed: if util_param_name in list(pyro.get_param_store().keys()): util_param = pyro.get_param_store().get_param(util_param_name) vals = util_param[0] probs = util_param[1] return vals, probs else: # else need to compute: utility_posterior = Importance( self.utility_model, num_samples = n_samples ).run(state, action, time_left) vals, probs = infer_probs(utility_posterior, possible_vals = torch.tensor([0.,1.])) # Save util_param for later util_param = pyro.param(util_param_name, torch.cat((vals.unsqueeze(0), probs.unsqueeze(0)), dim=0) ) return vals, probs
def infer_actions(self, state, time_left, n_samples = 10): """ Infers the probability of each action maximizing utility Input: - current state - time left Output:- values and probabilities of each action maximizing utility """ action_param_name = 'actions_values_at_state_{}_time{}'.format(state.int(), time_left.int()) # already computed: if action_param_name in list(pyro.get_param_store().keys()): action_param = pyro.get_param_store().get_param(action_param_name) vals = action_param[0] probs = action_param[1] return vals, probs else: # need to compute: action_posterior = Importance(self.action_model, num_samples = n_samples).run( state, time_left ) vals, probs = infer_probs(action_posterior, possible_vals = torch.tensor([0.,1.,2.])) # Save for later action_param = pyro.param(action_param_name, torch.cat((vals.unsqueeze(0), probs.unsqueeze(0)), dim=0) ) return vals, probs
def update_belief(self, state): # can use cache strategy to speed up running time observations = self.observe(state) # observations at state if observations: num_samples = 20 is_belief = Importance(self.belief_model, num_samples = num_samples).run(self.belief, observations) is_marginal = EmpiricalMarginal(is_belief) self.belief = is_marginal
def infer_utility(self, state, action): num_samples = 100 utility_posterior = Importance(self.utility_model, num_samples=num_samples).run( state, action) possib_vals, probs = self.infer_prob(utility_posterior, torch.tensor((-10, 6, 8)), num_samples) return possib_vals, probs
def softmax_agent_model(env): """softmax_agent_model Softmax agent model; Performs inference to estimate $Q^\pi(s, a)$, then uses pyro.factor to modify the trace log-likelihood. :param env: OpenAI Gym environment """ policy_probs = torch.ones(env.state_space.n, env.action_space.n) policy_vector = pyro.sample('policy_vector', Categorical(policy_probs)) inference = Importance(trajectory_model, num_samples=args.num_samples) posterior = inference.run(env, lambda state: policy_vector[state]) Q = EmpiricalMarginal(posterior, 'G').mean pyro.factor('factor_Q', args.alpha * Q) return policy_vector
def softmax_agent_model(t, env, *, trajectory_model): """softmax_agent_model Softmax agent model; Performs inference to estimate $Q^\pi(s, a)$, then uses pyro.factor to modify the trace log-likelihood. :param t: time-step :param env: OpenAI Gym environment :param trajectory_model: trajectory probabilistic program """ action_probs = torch.ones(env.action_space.n) action = pyro.sample(f'A_{t}', Categorical(action_probs)) inference = Importance(trajectory_model, num_samples=args.num_samples) posterior = inference.run(t, env, action) Q = EmpiricalMarginal(posterior, f'G_{t}').mean pyro.factor(f'softmax_{t}', args.alpha * Q) return action
def policy(t, env, *, trajectory_model, log=False): """policy :param t: time-step :param env: OpenAI Gym environment :param trajectory_model: trajectory probabilistic program :param log: boolean; if True, print log info """ inference = Importance(softmax_agent_model, num_samples=args.num_samples) posterior = inference.run(t, env, trajectory_model=trajectory_model) marginal = EmpiricalMarginal(posterior, f'A_{t}') if log: samples = marginal.sample((args.num_samples, )) counts = Counter(samples.tolist()) hist = [ counts[i] / args.num_samples for i in range(env.action_space.n) ] print('policy:') print(tabulate([hist], headers=env.actions, tablefmt='fancy_grid')) return marginal.sample()
def policy(env, log=False): """policy :param env: OpenAI Gym environment :param log: boolean; if True, print log info """ inference = Importance(softmax_agent_model, num_samples=args.num_samples) posterior = inference.run(env) marginal = EmpiricalMarginal(posterior, 'policy_vector') if log: policy_samples = marginal.sample((args.num_samples, )) action_samples = policy_samples[:, env.state] counts = Counter(action_samples.tolist()) hist = [ counts[i] / args.num_samples for i in range(env.action_space.n) ] print('policy:') print(tabulate([hist], headers=env.actions, tablefmt='fancy_grid')) policy_vector = marginal.sample() return policy_vector[env.state]
def expected_reward(Q_function, action, env, i): def get_posterior_mean(posterior, n_samples=30): """ Calculate posterior mean """ # Sample marginal_dist = EmpiricalMarginal(posterior).sample( (n_samples, 1)).float() # assumed to be all the same return torch.mean(marginal_dist) # The use of the param store is an optimization param_name = 'posterior_reward_state{}_{}'.format(env.s, i) if param_name in list(pyro.get_param_store().keys()): posterior_mean = pyro.get_param_store().get_param(param_name) return posterior_mean else: # this gets slower as we increase num_samples inference = Importance(Q_function, num_samples=30) posterior = inference.run(action, env, i) posterior_mean = get_posterior_mean(posterior, 30) pyro.param(param_name, posterior_mean) return posterior_mean
def test_action_model_add_logmass(self): """ Test to see if add_factor() changes logmass to desired output with Importance sampling """ agent = Agent() state = torch.tensor(1.0) time_left = torch.tensor(0.0) dummy_utility = torch.tensor(1.0) # Create dummy utility function def dummy_utility_func(state, action, time_left): return dummy_utility # Set expected utility to the dummy function agent.expected_utility = dummy_utility_func posterior = Importance(agent.action_model, num_samples=1).run(state, time_left) actual_logmass = next(posterior._traces(state, time_left))[1] expected_logmass = 100 * dummy_utility self.assertEqual(actual_logmass, expected_logmass)
def main(): """ This main routine will run the agent using pyro's EmpiricalMarginal and Importance methods as opposed to the main.py which uses a custom loop to step the agent. This main routine runs but is less flexible when it comes to visualising due to the samples in the posterior being private. It was also found that trying to pass the posterior back as the prior did not yield clear results but did step the model forward in the expected direction. This should be looked into further. :return: """ posterior = None sensor = build_observations() agent = Agent(x=0., y=500.) agent.pick_destination(doors=environment.doors) # This is the inference algorithm. From what is understood this simply just samples the stochastic # function and builds a distribution. infer = Importance(model=agent.step, num_samples=1000) for step in range(n_steps): # Assimilate observation and update the prior with the posterior distribution. if (step % 10) == 0: # For every n steps, calibrate. obs = sensor.aggregate_obs(step) print('\nAssimilating_Observation at Step {}'.format(step)) sensor.print_detail(step) else: # Else do not make an observation and simply run the model forward using the agents internal position. obs = None # This contains the attributes such as mean and std for the posterior which then can be used to pass back as # posterior = pyro.infer.EmpiricalMarginal(infer.run(posterior=posterior, obs=obs), sites=['xy']) print_agent_loc(agent, step) print_posterior(posterior)
def infer_Q(env, action, *, trajectory_model, agent_model, log=False): """infer_Q Infer Q(state, action) via pyro's importance sampling. :param env: OpenAI Gym environment :param action: integer action :param trajectory_model: trajectory probabilistic program :param agent_model: agent's probabilistic program :param log: boolean; if True, print log info """ posterior = Importance( pyro.do(trajectory_model, {'A_0': torch.as_tensor(action)}), num_samples=args.num_samples, ).run(env, agent_model=agent_model) Q = EmpiricalMarginal(posterior, 'G').mean if log: print(f'Q({env.actions[action]}) = {Q.item()}') return Q
def infer(self, state, time_left, variable, n_samples=10): """ Agent inference method: backwards from the simulation evidence """ ## OR ADD ACTION (as before)??? param_name = 'posterior_var{}_state{}_time{}'.format(variable, state.int(), time_left.int()) # if already computed: if param_name in list(pyro.get_param_store().keys()): variable_posterior = pyro.get_param_store().get_param(param_name) return variable_posterior else: # else need to compute: imp_sampling_posterior = Importance( self.simulate, num_samples = n_samples ).run(state,time_left,variable) variable_posterior = sample_and_count(imp_sampling_posterior,variable) # Save util_param for later param = pyro.param(param_name, variable_posterior) return variable_posterior
def infer_Q(env, action, infer_type='intervention', *, agent_model): """infer_Q Infer Q(state, action) via pyro's importance sampling, via conditioning or intervention. :param env: OpenAI Gym environment :param action: integer action :param infer_type: type of inference; none, condition, or intervention :param agent_model: agent's probabilistic program """ if infer_type not in ('intervention', 'condition', 'none'): raise ValueError('Invalid inference type {infer_type}') if infer_type == 'intervention': model = pyro.do(trajectory_model, {'A_0': torch.tensor(action)}) elif infer_type == 'condition': model = pyro.condition(trajectory_model, {'A_0': torch.tensor(action)}) else: # infer_type == 'none' model = trajectory_model posterior = Importance(model, num_samples=args.num_samples).run( env, agent_model=agent_model) return EmpiricalMarginal(posterior, 'G').mean
def sample(vae, training_shape, num_samples=1): posterior = Importance(vae.model, num_samples=num_samples) #set_trace() mask = torch.arange(training_shape.float().max( )) < training_shape.dataset.data.tolist().float().unsqueeze(-1) set_trace()
def vi_ape(model, design, observation_labels, target_labels, vi_parameters, is_parameters, y_dist=None): """Estimates the average posterior entropy (APE) loss function using variational inference (VI). The APE loss function estimated by this method is defined as :math:`APE(d)=E_{Y\\sim p(y|\\theta, d)}[H(p(\\theta|Y, d))]` where :math:`H[p(x)]` is the `differential entropy <https://en.wikipedia.org/wiki/Differential_entropy>`_. The APE is related to expected information gain (EIG) by the equation :math:`EIG(d)=H[p(\\theta)]-APE(d)` in particular, minimising the APE is equivalent to maximising EIG. :param function model: A pyro model accepting `design` as only argument. :param torch.Tensor design: Tensor representation of design :param list observation_labels: A subset of the sample sites present in `model`. These sites are regarded as future observations and other sites are regarded as latent variables over which a posterior is to be inferred. :param list target_labels: A subset of the sample sites over which the posterior entropy is to be measured. :param dict vi_parameters: Variational inference parameters which should include: `optim`: an instance of :class:`pyro.Optim`, `guide`: a guide function compatible with `model`, `num_steps`: the number of VI steps to make, and `loss`: the loss function to use for VI :param dict is_parameters: Importance sampling parameters for the marginal distribution of :math:`Y`. May include `num_samples`: the number of samples to draw from the marginal. :param pyro.distributions.Distribution y_dist: (optional) the distribution assumed for the response variable :math:`Y` :return: Loss function estimate :rtype: `torch.Tensor` """ if isinstance(observation_labels, str): observation_labels = [observation_labels] if target_labels is not None and isinstance(target_labels, str): target_labels = [target_labels] def posterior_entropy(y_dist, design): # Important that y_dist is sampled *within* the function y = pyro.sample("conditioning_y", y_dist) y_dict = { label: y[i, ...] for i, label in enumerate(observation_labels) } conditioned_model = pyro.condition(model, data=y_dict) SVI(conditioned_model, **vi_parameters).run(design) # Recover the entropy return mean_field_guide_entropy(vi_parameters["guide"], [design], whitelist=target_labels) if y_dist is None: y_dist = EmpiricalMarginal(Importance(model, **is_parameters).run(design), sites=observation_labels) # Calculate the expected posterior entropy under this distn of y loss_dist = EmpiricalMarginal( Search(posterior_entropy).run(y_dist, design)) loss = loss_dist.mean return loss
def infer(self, init_state, timeLeft): num_samples = 1000 action_posterior = Importance(self.model, num_samples = num_samples).run(init_state, timeLeft) possib_vals, probs = self.infer_prob(posterior=action_posterior, num_samples=num_samples) return possib_vals, probs
def posterior_latent(self, x, num_traces=100, num_samples=100): posterior = Importance(self.pyro_model, self.pyro_guide, num_traces) marginal = Marginal(posterior) return torch.stack([marginal(x) for _ in range(num_samples)])
def infer(self, model, noise): return Importance(model, num_samples=2000).run(noise)