def _log_prob_with_subsetting(self, obs: Tensor, group_idx: Selector, time_idx: Selector, measure_idx: Selector, **kwargs) -> Tensor: self._check_lp_sub_input(group_idx, time_idx) idx_3d = bmat_idx(group_idx, time_idx, measure_idx) idx_4d = bmat_idx(group_idx, time_idx, measure_idx, measure_idx) dist = MultivariateNormal(self.predictions[idx_3d], self.prediction_uncertainty[idx_4d]) return dist.log_prob(obs[idx_3d])
def choose_action(self, observation, memory): action_mean = self.actor(observation) cov_mat = torch.diag(self.agent_action).to(device) dist = MultivariateNormal(action_mean, cov_mat) action = dist.sample() action_log_prob = dist.log_prob(action) memory.observations.append(observation) memory.actions.append(action) memory.log_probs.append(action_log_prob) return action.detach()
def act(self, state): if self.has_continuous_action_space: action_mean = self.actor(state) cov_mat = torch.diag(self.action_var).unsqueeze(dim=0) dist = MultivariateNormal(action_mean, cov_mat) else: action_probs = self.actor(state) dist = Categorical(action_probs) action = dist.sample() action_logprob = dist.log_prob(action) return action.detach(), action_logprob.detach()
def evaluate(self, state, action): '''Evaluate action for a given state.''' action_mean, _, state_value = self.forward(state) action_var = self.action_var.expand_as(action_mean) cov_mat = torch.diag_embed(action_var) dist = MultivariateNormal(action_mean, cov_mat) action_logprobs = dist.log_prob(action) dist_entropy = dist.entropy() return action_logprobs, torch.squeeze(state_value), dist_entropy
def evaluate(self, observation, action): action_mean = self.actor(observation) agent_action = self.agent_action.expand_as(action_mean) cov_mat = torch.diag_embed(agent_action).to(device) dist = MultivariateNormal(action_mean, cov_mat) action_log_probs = dist.log_prob(action) dist_entropy = dist.entropy() observation_value = self.critic(observation) return action_log_probs, torch.squeeze(observation_value), dist_entropy
def act(self, state, memory): action_mean = self.actor(state) cov_mat = torch.diag(self.action_var).to(device) dist = MultivariateNormal(action_mean, cov_mat) action = dist.sample() action_logprob = dist.log_prob(action) memory.states.append(state) memory.actions.append(action) memory.logprobs.append(action_logprob) return action.detach()
def _log_prob(self, s, a, old=False): # calculate the log probability if old: with torch.no_grad(): mean, std = self.actor_old(s) else: mean, std = self.actor(s) std = torch.stack([std] * mean.shape[0], dim=0) cov = torch.diag_embed(std) dist = MultivariateNormal(loc=mean, covariance_matrix=cov) log_prob = dist.log_prob(a).unsqueeze(dim=-1) return log_prob
def evaluate(self, states, actions): action_means = self.agent(states) action_var = torch.full((action_dim, ), self.sigma) action_var = action_var.expand_as(action_means) cov_mat = torch.diag_embed(action_var).to(device) dist = MultivariateNormal(action_means, cov_mat) action_logprobs = dist.log_prob(actions) dist_entropy = dist.entropy() return action_logprobs, dist_entropy
def mvnpdf_log(x, mu=None, sigma=None) -> torch.Tensor: """ :param x: [batch, ndim] :param mu: [batch, ndim] :param sigma: [batch, ndim, ndim] :return: log_prob [batch] """ if mu is None: mu = tensor([0.]) if sigma is None: sigma = torch.eye(len(mu)) d = MultivariateNormal(loc=mu, covariance_matrix=sigma) return d.log_prob(x)
def act(self, state): state = torch.from_numpy(state).float().to(device) action_probs = self.action_layer(state) cov_mat = torch.diag(self.action_var).to(device) # print(action_probs) dist = MultivariateNormal(action_probs, cov_mat) action = dist.sample() log_prob = dist.log_prob(action) # print('action',action) # memory.states.append(state) # memory.actions.append(action) # memory.logprobs.append(log_prob) return action, log_prob
def get_action(policy_new, obs): global cov_mat mean = policy_new(obs) dist = MultivariateNormal(mean, cov_mat) # Sample an action from the distribution action = dist.sample() # Calculate the log probability for that action log_prob = dist.log_prob(action) # Return the sampled action and the log probability of that action in our distribution return action.detach().numpy(), log_prob.detach()
def multi_normal_log_density(x, mean, cov, wi_list=None): #import pdb; pdb.set_trace() if wi_list is None: dist = MultivariateNormal(mean, cov) return dist.log_prob(x) else: results = [] for wi in wi_list: idx = np.argwhere(wi).squeeze(0) meani = mean[:, idx] covi = cov[:, idx, :][:, :, idx] xi = x[:, idx] if use_gpu: meani, covi, xi = meani.cpu(), covi.cpu(), xi.cpu() dist = MultivariateNormal(meani, covi) lp = dist.log_prob(xi) results.append(lp.unsqueeze(1)) log_prob = torch.cat(results, dim=1) if use_gpu: return log_prob.cuda() else: return log_prob
def evaluate(self, states, action): states = torch.stack(states) states = states.view(-1, *states.shape[-3:]) actor_critic_input = self.conv(states).view(-1, self.size) action_mean = self.actor(actor_critic_input) action_var = self.action_var.repeat(states.shape[0], 1) cov_mat = torch.diag_embed(action_var).to(device) dist = MultivariateNormal(action_mean, cov_mat) action = action.view(-1, action_size) action_logprobs = dist.log_prob(action).view(states.shape[:-3]) dist_entropy = dist.entropy().view(states.shape[:-3]) state_value = self.critic(actor_critic_input).view(states.shape[:-3]) return action_logprobs, torch.squeeze(state_value), dist_entropy
def evaluate(self, state, action): action_mean = self.actor(state) action_var = self.action_var.expand_as(action_mean) cov_mat = torch.diag_embed(action_var).to(device) dist = MultivariateNormal(action_mean, cov_mat) action_logprobs = dist.log_prob(action) dist_entropy = dist.entropy() state_value = self.critic(state) return action_logprobs, torch.squeeze(state_value), dist_entropy
class GaussianKDE: def __init__(self, X, bw, device="cuda:0"): """ X : tensor (n, d) `n` points with `d` dimensions to which KDE will be fit bw : numeric bandwidth for Gaussian kernel """ self.X = X # D.W. Scott, “Multivariate Density Estimation: Theory, Practice, and Visualization”, J # ohn Wiley & Sons, New York, Chicester, 1992. # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gaussian_kde.html if bw == "scott": n, d = X.shape bw = n**(-1. / (d + 4)) self.bw = bw self.dims = X.shape[-1] self.n = X.shape[0] self.mvn = MultivariateNormal(loc=torch.zeros(self.dims).to(device), covariance_matrix=torch.eye( self.dims).to(device)) def score_samples(self, Y, X=None): """Returns the kernel density estimates of each point in `Y`. Parameters ---------- Y : tensor (m, d) `m` points with `d` dimensions for which the probability density will be calculated X : tensor (n, d), optional `n` points with `d` dimensions to which KDE will be fit. Provided to allow batch calculations in `log_prob`. By default, `X` is None and all points used to initialize KernelDensityEstimator are included. Returns ------- log_probs : tensor (m) log probability densities for each of the queried points in `Y` """ if X is None: X = self.X log_probs = torch.log( (self.bw**(-self.dims) * torch.exp(self.mvn.log_prob( (X.unsqueeze(1) - Y) / self.bw))).sum(dim=0) / self.n) return log_probs
def forward(self, state): value = self.critic(state) action_mean = self.actor(state) cov_mat = torch.diag(self.action_var).to(self.device) dist = MultivariateNormal(action_mean, cov_mat) if not self.random_action: action = action_mean else: action = dist.sample() action_logprobs = dist.log_prob(action) return action.detach(), action_logprobs, value
def evaluate(self, state, action): action_mean = self.actor(state) action_var = self.action_var.expand_as(action_mean) #torch.diag_embed returns 2D diagnoal array with tensor's elements as main diagonal cov_mat = torch.diag_embed(action_var).to(device) dist = MultivariateNormal(action_mean, cov_mat) # its probablitis not values Pi(a|s) action_logprobs = dist.log_prob(action) dist_entropy = dist.entropy() state_value = self.critic(state) return action_logprobs, torch.squeeze(state_value), dist_entropy
def evaluate(self, state, action): action_mean = self._action_mean(state) # action_mean = torch.squeeze(x) action_var = self.action_var.expand_as(action_mean) #action_log_std cov_mat = torch.diag_embed(action_var).to(device) dist = MultivariateNormal(action_mean, cov_mat) # action_logprobs = dist.log_prob(torch.squeeze(action)) action_logprobs = dist.log_prob(action) dist_entropy = dist.entropy() state_value = self.critic(state) # import pdb; pdb.set_trace() return action_logprobs, torch.squeeze(state_value), dist_entropy
def forward(self, x, a): x = torch.tensor(x, dtype=torch.float) x = torch.tanh(self.fc1(x)) x = torch.tanh(self.fc2(x)) means = self.means(x) dist = MultivariateNormal(means, self.eye) if a is None: a = dist.sample().detach().numpy() log_prob = dist.log_prob(torch.tensor(a, dtype=torch.float)) return log_prob, a
def forward(self, state): output_1 = F.relu(self.linear1(state)) output_2 = F.relu(self.linear2(output_1)) mu = 2 * torch.sigmoid(self.mu(output_2)) #有正有负 sigma = F.relu(self.sigma(output_2)) + 0.001 # avoid 0 softplus output = F.softmax(output, dim=-1) action_mean = self.linear3(output) #cov_mat = torch.diag(self.action_var).to(device) mu = torch.diag_embed(mu).to(device) sigma = torch.diag_embed(sigma).to(device) # change to 2D dist = MultivariateNormal(mu,sigma) #N(μ,σ^2) σ超参不用训练 MultivariateNormal(action_mean, cov_mat) #distribution = Categorical(F.softmax(output, dim=-1)) entropy = dist.entropy().mean() action = dist.sample() action_logprob = dist.log_prob(action) return action.detach(),action_logprob,entropy #distribution .detach()
def evaluate_true(self, X: Tensor) -> Tensor: r"""Evaluate the GMMs.""" # This needs to be reinstantiated because MVN apparently does not # have a `to` method to make it device/dtype agnostic. mvn = MultivariateNormal(loc=self.gmm_pos, covariance_matrix=self.gmm_covar) view_shape = ( X.shape[:-1] + torch.Size([1] * (self.gmm_pos.ndim - 1)) + self.gmm_pos.shape[-1:] ) expand_shape = X.shape[:-1] + self.gmm_pos.shape pdf_X = mvn.log_prob(X.view(view_shape).expand(expand_shape)).exp() # Multiply by -1 to make this a minimization problem by default return -(self.gmm_norm * pdf_X).sum(dim=-1)
def get_next_state(self, state, action): hidden = self.hidden_arr[3](torch.cat((state, action), dim=-1)) next_state_mean = self.policy_arr[3](hidden) next_state_log_std = self.next_state_std.expand_as(next_state_mean) next_state_std = torch.exp(next_state_log_std) next_state_dist = MultivariateNormal(next_state_mean, torch.diag_embed(next_state_std)) next_state = next_state_dist.sample() next_state_log_prob = next_state_dist.log_prob(next_state).reshape( -1, 1) continuous_state = state[..., -17 - 6:-17] + action[..., -6:] return torch.cat((action[..., :-6], continuous_state, next_state), dim=-1), next_state_log_prob
def get_training_params(self, frame, mes, action): frame = torch.squeeze(torch.stack(frame)) mes = torch.squeeze(torch.stack(mes)) action = torch.stack(action) mean = self.actor_(frame, mes) action_expanded = self.action_var.expand_as(mean) cov_matrix = torch.diag_embed(action_expanded).to(device) gauss_dist = MultivariateNormal(mean, cov_matrix) action_log_prob = gauss_dist.log_prob(action).to(device) entropy = gauss_dist.entropy().to(device) state_value = torch.squeeze(self.critic_(frame, mes)).to(device) return action_log_prob, state_value, entropy
def act(self, state, memory): state_input = self.conv(state).view(-1, self.size) action_mean = self.actor(state_input) cov_mat = torch.diag(self.action_var) dist = MultivariateNormal(action_mean, cov_mat) action = dist.sample() action_logprob = dist.log_prob(action) memory.states.append(state) memory.actions.append(action) memory.logprobs.append(action_logprob) return action.detach()
def get_action(self, obs, actorIndex): mean = None if actorIndex == 1: mean = self.actor1(obs) if actorIndex == 2: mean = self.actor2(obs) dist = MultivariateNormal(mean, self.cov_mat) action = dist.sample() log_prob = dist.log_prob(action) return action.detach().numpy(), log_prob.detach( ) #might break for me here
def evaluate(self, old_state, old_action): action_mean = self.actor(old_state) action_var = self.action_var.expand_as(action_mean) cov_mat = torch.diag_embed(action_var).to(self.device) dist = MultivariateNormal(action_mean, cov_mat) #probability of old action under new policy action_log_probs = dist.log_prob(old_action) state_value = self.critic(old_state) dist_entropy = dist.entropy() return torch.squeeze(state_value), action_log_probs, dist_entropy
def act(self, state): '''Choose action according to the policy.''' action_mu, action_sigma, state_value = self.forward(state) action_var = self.action_var.expand_as(action_mu) cov_mat = torch.diag_embed(action_var) dist = MultivariateNormal(action_mu, cov_mat) action = dist.sample() #print("act bef = ", action) action = np.clip(action, 0, 1) #print("act aft = ", action) log_prob = dist.log_prob(action) return action.detach(), log_prob.detach()
def loss( self, oh, ce, mask, recon_oh, recon_ce_mean, recon_ce_log_var, gamma_d, z_mean, z_log_var, avg=True): # NL1 for oh NL1 = -(oh * (recon_oh + self.det).log() ).sum(1) # cross entropy loss # NL2 for ce dist = MultivariateNormal( loc=recon_ce_mean, covariance_matrix=torch.diag_embed( recon_ce_log_var.exp().sqrt())) NL2 = (-dist.log_prob(ce.transpose(0, 1)).transpose(0, 1) * mask).sum(1) NL = NL1 + NL2 # KLD_for pi KLD1 = -torch.sum(gamma_d * torch.log(self.pi.unsqueeze(0) / gamma_d + self.det), 1) # KLD2 for all domains logvar_division = self.log_var_d.unsqueeze(0) var_division = torch.exp( z_log_var.unsqueeze(1) - self.log_var_d.unsqueeze(0)) diff = z_mean.unsqueeze(1) - self.mean_d.unsqueeze(0) diff_term = diff.pow(2) / torch.exp(self.log_var_d.unsqueeze(0)) KLD21 = torch.sum( logvar_division + var_division + diff_term, 2) KLD21 = 0.5 * torch.sum(gamma_d * KLD21, 1) KLD22 = -0.5 * torch.sum(1 + z_log_var, 1) KLD2 = KLD21 + KLD22 KLD = KLD1 + KLD2 loss = NL + KLD # in training mode, return averaged loss. In testing mode, return # individual loss if avg: return loss.mean() else: return loss
def _pred_point(self, goal_embed, im_shape, min_std=0.03): if self._2_point is None: return point_dist = self._2_point(goal_embed[:,0]) mu = point_dist[:,:2] c1, c2, c3 = F.softplus(point_dist[:,2])[:,None], point_dist[:,3][:,None], F.softplus(point_dist[:,4])[:,None] scale_tril = torch.cat((c1 + min_std, torch.zeros_like(c2), c2, c3 + min_std), dim=1).reshape((-1, 2, 2)) mu, scale_tril = [x.unsqueeze(1).unsqueeze(1) for x in (mu, scale_tril)] point_dist = MultivariateNormal(mu, scale_tril=scale_tril) h = torch.linspace(-1, 1, im_shape[0]).reshape((1, -1, 1, 1)).repeat((1, 1, im_shape[1], 1)) w = torch.linspace(-1, 1, im_shape[1]).reshape((1, 1, -1, 1)).repeat((1, im_shape[0], 1, 1)) hw = torch.cat((h, w), 3).repeat((goal_embed.shape[0], 1, 1, 1)).to(goal_embed.device) return point_dist.log_prob(hw)
def log_prob(self, x): log_prob = 0 for layer in self.layers[::-1]: x, log_prob_change = layer.g(x) log_prob = log_prob_change + log_prob if self.prior is None: norm_prior = MultivariateNormal(torch.zeros(self.num_vars).to(x.device), torch.eye(self.num_vars).to(x.device)) log_prob += norm_prior.log_prob(x) else: log_prob += self.prior.log_prob(x) return log_prob