示例#1
0
    def _log_prob_with_subsetting(self,
                                  obs: Tensor,
                                  group_idx: Selector,
                                  time_idx: Selector,
                                  measure_idx: Selector,
                                  **kwargs) -> Tensor:
        self._check_lp_sub_input(group_idx, time_idx)

        idx_3d = bmat_idx(group_idx, time_idx, measure_idx)
        idx_4d = bmat_idx(group_idx, time_idx, measure_idx, measure_idx)

        dist = MultivariateNormal(self.predictions[idx_3d], self.prediction_uncertainty[idx_4d])
        return dist.log_prob(obs[idx_3d])
    def choose_action(self, observation, memory):
        action_mean = self.actor(observation)
        cov_mat = torch.diag(self.agent_action).to(device)

        dist = MultivariateNormal(action_mean, cov_mat)
        action = dist.sample()
        action_log_prob = dist.log_prob(action)

        memory.observations.append(observation)
        memory.actions.append(action)
        memory.log_probs.append(action_log_prob)

        return action.detach()
示例#3
0
    def act(self, state):
        if self.has_continuous_action_space:
            action_mean = self.actor(state)
            cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
            dist = MultivariateNormal(action_mean, cov_mat)
        else:
            action_probs = self.actor(state)
            dist = Categorical(action_probs)

        action = dist.sample()
        action_logprob = dist.log_prob(action)

        return action.detach(), action_logprob.detach()
示例#4
0
    def evaluate(self, state, action):
        '''Evaluate action for a given state.'''
        action_mean, _, state_value = self.forward(state)

        action_var = self.action_var.expand_as(action_mean)
        cov_mat = torch.diag_embed(action_var)

        dist = MultivariateNormal(action_mean, cov_mat)

        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()

        return action_logprobs, torch.squeeze(state_value), dist_entropy
    def evaluate(self, observation, action):
        action_mean = self.actor(observation)

        agent_action = self.agent_action.expand_as(action_mean)
        cov_mat = torch.diag_embed(agent_action).to(device)

        dist = MultivariateNormal(action_mean, cov_mat)

        action_log_probs = dist.log_prob(action)
        dist_entropy = dist.entropy()
        observation_value = self.critic(observation)

        return action_log_probs, torch.squeeze(observation_value), dist_entropy
示例#6
0
    def act(self, state, memory):
        action_mean = self.actor(state)
        cov_mat = torch.diag(self.action_var).to(device)

        dist = MultivariateNormal(action_mean, cov_mat)
        action = dist.sample()
        action_logprob = dist.log_prob(action)

        memory.states.append(state)
        memory.actions.append(action)
        memory.logprobs.append(action_logprob)

        return action.detach()
示例#7
0
    def _log_prob(self, s, a, old=False):
        # calculate the log probability
        if old:
            with torch.no_grad():
                mean, std = self.actor_old(s)
        else:
            mean, std = self.actor(s)
        std = torch.stack([std] * mean.shape[0], dim=0)

        cov = torch.diag_embed(std)
        dist = MultivariateNormal(loc=mean, covariance_matrix=cov)
        log_prob = dist.log_prob(a).unsqueeze(dim=-1)
        return log_prob
    def evaluate(self, states, actions):
        action_means = self.agent(states)

        action_var = torch.full((action_dim, ), self.sigma)
        action_var = action_var.expand_as(action_means)
        cov_mat = torch.diag_embed(action_var).to(device)

        dist = MultivariateNormal(action_means, cov_mat)

        action_logprobs = dist.log_prob(actions)
        dist_entropy = dist.entropy()

        return action_logprobs, dist_entropy
示例#9
0
def mvnpdf_log(x, mu=None, sigma=None) -> torch.Tensor:
    """
    :param x: [batch, ndim]
    :param mu: [batch, ndim]
    :param sigma: [batch, ndim, ndim]
    :return: log_prob [batch]
    """
    if mu is None:
        mu = tensor([0.])
    if sigma is None:
        sigma = torch.eye(len(mu))
    d = MultivariateNormal(loc=mu, covariance_matrix=sigma)
    return d.log_prob(x)
 def act(self, state):
     state = torch.from_numpy(state).float().to(device) 
     action_probs = self.action_layer(state)
     cov_mat = torch.diag(self.action_var).to(device)
     # print(action_probs)
     dist = MultivariateNormal(action_probs, cov_mat)
     action = dist.sample()
     log_prob = dist.log_prob(action)
     # print('action',action)
     # memory.states.append(state)
     # memory.actions.append(action)
     # memory.logprobs.append(log_prob)
     return action, log_prob
def get_action(policy_new, obs):
    global cov_mat
    mean = policy_new(obs)
    dist = MultivariateNormal(mean, cov_mat)

    # Sample an action from the distribution
    action = dist.sample()

    # Calculate the log probability for that action
    log_prob = dist.log_prob(action)

    # Return the sampled action and the log probability of that action in our distribution
    return action.detach().numpy(), log_prob.detach()
示例#12
0
def multi_normal_log_density(x, mean, cov, wi_list=None):
    #import pdb; pdb.set_trace()
    if wi_list is None:
        dist = MultivariateNormal(mean, cov)
        return dist.log_prob(x)
    else:
        results = []
        for wi in wi_list:
            idx = np.argwhere(wi).squeeze(0)
            meani = mean[:, idx]
            covi = cov[:, idx, :][:, :, idx]
            xi = x[:, idx]
            if use_gpu:
                meani, covi, xi = meani.cpu(), covi.cpu(), xi.cpu()
            dist = MultivariateNormal(meani, covi)
            lp = dist.log_prob(xi)
            results.append(lp.unsqueeze(1))
        log_prob = torch.cat(results, dim=1)
        if use_gpu:
            return log_prob.cuda()
        else:
            return log_prob
 def evaluate(self, states, action):
     states = torch.stack(states)
     states = states.view(-1, *states.shape[-3:])
     actor_critic_input = self.conv(states).view(-1, self.size)
     action_mean = self.actor(actor_critic_input)
     action_var = self.action_var.repeat(states.shape[0], 1)
     cov_mat = torch.diag_embed(action_var).to(device)
     dist = MultivariateNormal(action_mean, cov_mat)
     action = action.view(-1, action_size)
     action_logprobs = dist.log_prob(action).view(states.shape[:-3])
     dist_entropy = dist.entropy().view(states.shape[:-3])
     state_value = self.critic(actor_critic_input).view(states.shape[:-3])
     return action_logprobs, torch.squeeze(state_value), dist_entropy
示例#14
0
    def evaluate(self, state, action):
        action_mean = self.actor(state)

        action_var = self.action_var.expand_as(action_mean)
        cov_mat = torch.diag_embed(action_var).to(device)

        dist = MultivariateNormal(action_mean, cov_mat)

        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()
        state_value = self.critic(state)

        return action_logprobs, torch.squeeze(state_value), dist_entropy
class GaussianKDE:
    def __init__(self, X, bw, device="cuda:0"):
        """
        X : tensor (n, d)
          `n` points with `d` dimensions to which KDE will be fit
        bw : numeric
          bandwidth for Gaussian kernel
        """

        self.X = X
        # D.W. Scott, “Multivariate Density Estimation: Theory, Practice, and Visualization”, J
        # ohn Wiley & Sons, New York, Chicester, 1992.
        # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gaussian_kde.html
        if bw == "scott":
            n, d = X.shape
            bw = n**(-1. / (d + 4))
        self.bw = bw
        self.dims = X.shape[-1]
        self.n = X.shape[0]
        self.mvn = MultivariateNormal(loc=torch.zeros(self.dims).to(device),
                                      covariance_matrix=torch.eye(
                                          self.dims).to(device))

    def score_samples(self, Y, X=None):
        """Returns the kernel density estimates of each point in `Y`.

        Parameters
        ----------
        Y : tensor (m, d)
          `m` points with `d` dimensions for which the probability density will
          be calculated
        X : tensor (n, d), optional
          `n` points with `d` dimensions to which KDE will be fit. Provided to
          allow batch calculations in `log_prob`. By default, `X` is None and
          all points used to initialize KernelDensityEstimator are included.


        Returns
        -------
        log_probs : tensor (m)
          log probability densities for each of the queried points in `Y`
        """
        if X is None:
            X = self.X

        log_probs = torch.log(
            (self.bw**(-self.dims) *
             torch.exp(self.mvn.log_prob(
                 (X.unsqueeze(1) - Y) / self.bw))).sum(dim=0) / self.n)

        return log_probs
示例#16
0
    def forward(self, state):
        value = self.critic(state)
        action_mean = self.actor(state)
        cov_mat = torch.diag(self.action_var).to(self.device)
        dist = MultivariateNormal(action_mean, cov_mat)

        if not self.random_action:
            action = action_mean
        else:
            action = dist.sample()

        action_logprobs = dist.log_prob(action)

        return action.detach(), action_logprobs, value
 def evaluate(self, state, action):   
     action_mean = self.actor(state)
     
     action_var = self.action_var.expand_as(action_mean)
     #torch.diag_embed returns 2D diagnoal array with tensor's elements as main diagonal
     cov_mat = torch.diag_embed(action_var).to(device)
     
     dist = MultivariateNormal(action_mean, cov_mat)
     # its probablitis not values Pi(a|s)
     action_logprobs = dist.log_prob(action)
     dist_entropy = dist.entropy()
     state_value = self.critic(state)
     
     return action_logprobs, torch.squeeze(state_value), dist_entropy
示例#18
0
    def evaluate(self, state, action):
        action_mean = self._action_mean(state)
        # action_mean = torch.squeeze(x)

        action_var = self.action_var.expand_as(action_mean) #action_log_std
        cov_mat = torch.diag_embed(action_var).to(device)
        dist = MultivariateNormal(action_mean, cov_mat)

        # action_logprobs = dist.log_prob(torch.squeeze(action))
        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()
        state_value = self.critic(state)
        # import pdb; pdb.set_trace()
        return action_logprobs, torch.squeeze(state_value), dist_entropy
示例#19
0
    def forward(self, x, a):
        x = torch.tensor(x, dtype=torch.float)

        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))

        means = self.means(x)
        dist = MultivariateNormal(means, self.eye)

        if a is None:
            a = dist.sample().detach().numpy()

        log_prob = dist.log_prob(torch.tensor(a, dtype=torch.float))
        return log_prob, a
 def forward(self, state):
     output_1 = F.relu(self.linear1(state))
     output_2 = F.relu(self.linear2(output_1))
     mu = 2 * torch.sigmoid(self.mu(output_2))   #有正有负
     sigma = F.relu(self.sigma(output_2)) + 0.001   # avoid 0 softplus    output = F.softmax(output, dim=-1)         action_mean = self.linear3(output)
     #cov_mat = torch.diag(self.action_var).to(device)
     mu = torch.diag_embed(mu).to(device)
     sigma = torch.diag_embed(sigma).to(device)  # change to 2D
     dist = MultivariateNormal(mu,sigma)  #N(μ,σ^2)  σ超参不用训练 MultivariateNormal(action_mean, cov_mat) 
     #distribution = Categorical(F.softmax(output, dim=-1))
     entropy = dist.entropy().mean()
     action = dist.sample()
     action_logprob = dist.log_prob(action)
     return action.detach(),action_logprob,entropy   #distribution .detach()
示例#21
0
 def evaluate_true(self, X: Tensor) -> Tensor:
     r"""Evaluate the GMMs."""
     # This needs to be reinstantiated because MVN apparently does not
     # have a `to` method to make it device/dtype agnostic.
     mvn = MultivariateNormal(loc=self.gmm_pos, covariance_matrix=self.gmm_covar)
     view_shape = (
         X.shape[:-1]
         + torch.Size([1] * (self.gmm_pos.ndim - 1))
         + self.gmm_pos.shape[-1:]
     )
     expand_shape = X.shape[:-1] + self.gmm_pos.shape
     pdf_X = mvn.log_prob(X.view(view_shape).expand(expand_shape)).exp()
     # Multiply by -1 to make this a minimization problem by default
     return -(self.gmm_norm * pdf_X).sum(dim=-1)
示例#22
0
    def get_next_state(self, state, action):
        hidden = self.hidden_arr[3](torch.cat((state, action), dim=-1))
        next_state_mean = self.policy_arr[3](hidden)
        next_state_log_std = self.next_state_std.expand_as(next_state_mean)
        next_state_std = torch.exp(next_state_log_std)
        next_state_dist = MultivariateNormal(next_state_mean,
                                             torch.diag_embed(next_state_std))
        next_state = next_state_dist.sample()
        next_state_log_prob = next_state_dist.log_prob(next_state).reshape(
            -1, 1)

        continuous_state = state[..., -17 - 6:-17] + action[..., -6:]
        return torch.cat((action[..., :-6], continuous_state, next_state),
                         dim=-1), next_state_log_prob
示例#23
0
    def get_training_params(self, frame, mes, action):
        frame = torch.squeeze(torch.stack(frame))
        mes = torch.squeeze(torch.stack(mes))
        action = torch.stack(action)

        mean = self.actor_(frame, mes)
        action_expanded = self.action_var.expand_as(mean)
        cov_matrix = torch.diag_embed(action_expanded).to(device)

        gauss_dist = MultivariateNormal(mean, cov_matrix)
        action_log_prob = gauss_dist.log_prob(action).to(device)
        entropy = gauss_dist.entropy().to(device)
        state_value = torch.squeeze(self.critic_(frame, mes)).to(device)
        return action_log_prob, state_value, entropy
示例#24
0
    def act(self, state, memory):
        state_input = self.conv(state).view(-1, self.size)
        action_mean = self.actor(state_input)
        cov_mat = torch.diag(self.action_var)

        dist = MultivariateNormal(action_mean, cov_mat)
        action = dist.sample()
        action_logprob = dist.log_prob(action)

        memory.states.append(state)
        memory.actions.append(action)
        memory.logprobs.append(action_logprob)

        return action.detach()
示例#25
0
文件: ppo.py 项目: RCAVelez/SumoRC
    def get_action(self, obs, actorIndex):
        mean = None
        if actorIndex == 1:
            mean = self.actor1(obs)
        if actorIndex == 2:
            mean = self.actor2(obs)

        dist = MultivariateNormal(mean, self.cov_mat)

        action = dist.sample()
        log_prob = dist.log_prob(action)

        return action.detach().numpy(), log_prob.detach(
        )  #might break for me here
示例#26
0
 def evaluate(self, old_state, old_action): 
     action_mean = self.actor(old_state)
     
     action_var = self.action_var.expand_as(action_mean)
     cov_mat = torch.diag_embed(action_var).to(self.device)
     dist = MultivariateNormal(action_mean, cov_mat)
     
     #probability of old action under new policy
    
     action_log_probs = dist.log_prob(old_action)
     state_value = self.critic(old_state)
     dist_entropy = dist.entropy()
     
     return torch.squeeze(state_value), action_log_probs, dist_entropy
示例#27
0
    def act(self, state):
        '''Choose action according to the policy.'''
        action_mu, action_sigma, state_value = self.forward(state)

        action_var = self.action_var.expand_as(action_mu)
        cov_mat = torch.diag_embed(action_var)
        dist = MultivariateNormal(action_mu, cov_mat)
        action = dist.sample()
        #print("act bef = ", action)
        action = np.clip(action, 0, 1)
        #print("act aft = ", action)
        log_prob = dist.log_prob(action)

        return action.detach(), log_prob.detach()
    def loss(
            self,
            oh,
            ce,
            mask,
            recon_oh,
            recon_ce_mean,
            recon_ce_log_var,
            gamma_d,
            z_mean,
            z_log_var,
            avg=True):
        # NL1 for oh
        NL1 = -(oh * (recon_oh + self.det).log()
                ).sum(1)  # cross entropy loss
        # NL2 for ce
        dist = MultivariateNormal(
            loc=recon_ce_mean,
            covariance_matrix=torch.diag_embed(
                recon_ce_log_var.exp().sqrt()))
        NL2 = (-dist.log_prob(ce.transpose(0, 1)).transpose(0, 1) * mask).sum(1)
        NL = NL1 + NL2

        # KLD_for pi
        KLD1 = -torch.sum(gamma_d *
                          torch.log(self.pi.unsqueeze(0) / gamma_d + self.det), 1)
        # KLD2 for all domains
        logvar_division = self.log_var_d.unsqueeze(0)
        var_division = torch.exp(
            z_log_var.unsqueeze(1) -
            self.log_var_d.unsqueeze(0))
        diff = z_mean.unsqueeze(1) - self.mean_d.unsqueeze(0)
        diff_term = diff.pow(2) / torch.exp(self.log_var_d.unsqueeze(0))
        KLD21 = torch.sum(
            logvar_division + var_division + diff_term,
            2)
        KLD21 = 0.5 * torch.sum(gamma_d * KLD21, 1)
        KLD22 = -0.5 * torch.sum(1 + z_log_var, 1)
        KLD2 = KLD21 + KLD22
        KLD = KLD1 + KLD2

        loss = NL + KLD

        # in training mode, return averaged loss. In testing mode, return
        # individual loss
        if avg:
            return loss.mean()
        else:
            return loss
    def _pred_point(self, goal_embed, im_shape, min_std=0.03):
        if self._2_point is None:
            return
        
        point_dist = self._2_point(goal_embed[:,0])
        mu = point_dist[:,:2]
        c1, c2, c3 = F.softplus(point_dist[:,2])[:,None], point_dist[:,3][:,None], F.softplus(point_dist[:,4])[:,None]
        scale_tril = torch.cat((c1 + min_std, torch.zeros_like(c2), c2, c3 + min_std), dim=1).reshape((-1, 2, 2))
        mu, scale_tril = [x.unsqueeze(1).unsqueeze(1) for x in (mu, scale_tril)]
        point_dist = MultivariateNormal(mu, scale_tril=scale_tril)

        h = torch.linspace(-1, 1, im_shape[0]).reshape((1, -1, 1, 1)).repeat((1, 1, im_shape[1], 1))
        w = torch.linspace(-1, 1, im_shape[1]).reshape((1, 1, -1, 1)).repeat((1, im_shape[0], 1, 1))
        hw = torch.cat((h, w), 3).repeat((goal_embed.shape[0], 1, 1, 1)).to(goal_embed.device)
        return point_dist.log_prob(hw)
示例#30
0
    def log_prob(self, x):
        log_prob = 0
        for layer in self.layers[::-1]:
            x, log_prob_change = layer.g(x)
            log_prob = log_prob_change + log_prob

        if self.prior is None:
            norm_prior = MultivariateNormal(torch.zeros(self.num_vars).to(x.device),
                                            torch.eye(self.num_vars).to(x.device))

            log_prob += norm_prior.log_prob(x)
        else:
            log_prob += self.prior.log_prob(x)

        return log_prob