Exemplo n.º 1
0
 def run(self, x):
     x=Variable(Tensor(x))
     #the action space is continuous
     u=self(x)
     sigma2=torch.exp(self.logstd_raw)*self.outputid
     d=MultivariateNormal(u, sigma2)
     action=d.sample()
     self.history_of_log_probs.append(d.log_prob(action))
     return action
Exemplo n.º 2
0
 def run(self, x):
     x=Variable(x)
     #the action space is continuous
     u=self(x)
     sigma2=torch.exp(self.logstd_raw)*self.outputid
     d=MultivariateNormal(u, sigma2)
     action=d.sample()
     log_prob=d.log_prob(action)
     return action, log_prob
Exemplo n.º 3
0
 def run(self, x):
     x=Variable(x)
     u, logstd=self(x)
     sigma2=torch.exp(2*logstd)*self.outputid
     d=MultivariateNormal(u, sigma2) #might want to use N Gaussian instead
     action=d.sample()
     log_prob=d.log_prob(action)
     self.history_of_log_probs.append(log_prob)
     return action, log_prob
Exemplo n.º 4
0
    def recon_sketches(self, pis, mus, sigmas, rhos, qs, gamma):
        """
        Input:
            pis[batch, seq_len, M]
            mus[batch, seq_len, M, 2]
            sigmas[batch, seq_len, M, 2]
            rhos[batch, seq_len, M]
            qs[batch, seq_len, 3]
        Output:
            strokes[batch, seq_len, 5]:
        """
        batch_size, seq_len, M = pis.size()
        sketches = []
        sigmas = sigmas * gamma
        # Sample for each sketch
        for i in range(batch_size):
            strokes = []
            #print(pis[:,i,:].size(), pis[:,i,:].device)
            #print(pis.size(), mus.size(), sigmas.size(), rhos.size(), qs.size())
            for j in range(seq_len):
                comp_m = OneHotCategorical(logits=pis[i, j])
                comp_choice = (comp_m.sample() == 1)

                mu, sigma, rho, q = mus[i, j][comp_choice].view(-1), sigmas[
                    i, j][comp_choice].view(-1), rhos[i, j][comp_choice].view(
                        -1), qs[i, j].view(-1)

                cov = (torch.diag(
                    (sigma * sigma)) + (1 - torch.eye(2).to(mu.device)) * rho *
                       torch.prod(sigma)).to(device=mu.device)

                normal_m = MultivariateNormal(mu, cov)
                stroke_move = normal_m.sample().to(pis.device)  # [seq_len, 2]
                pen_states = (q == q.max(dim=0, keepdim=True)[0]).to(
                    dtype=torch.float)  #[seq_len, 3]

                stroke = torch.cat([stroke_move.view(-1),
                                    pen_states.view(-1)],
                                   dim=0).to(pis.device)
                strokes.append(stroke)
            sketches.append(torch.stack(strokes))
        return torch.stack(sketches)
    def __init__(self, n_samples, n_centers=9, sigma=0.02):
        super().__init__()
        self.nus = [torch.zeros(2)]
        self.sigma = sigma
        for i in range(n_centers - 1):
            R = get_rotation(i * 360 / (n_centers - 1))
            self.nus.append(torch.tensor([1, 0] @ R, dtype=torch.float))
        classes = torch.multinomial(torch.ones(n_centers),
                                    n_samples,
                                    replacement=True)

        data = []
        for i in range(n_centers):
            n_samples_class = torch.sum(classes == i)
            if n_samples_class == 0:
                continue
            dist = MultivariateNormal(self.nus[i],
                                      torch.eye(2) * self.sigma**2)
            data.append(dist.sample([n_samples_class.item()]))
        self.data = torch.cat(data)
Exemplo n.º 6
0
    def multibivariate_sampling(self, pi, mu1, mu2, sig1, sig2, ro):

        seq_len = mu1.shape[0]

        random_index = pi.multinomial(1)
        mu1 = mu1.gather(1, random_index)
        mu2 = mu2.gather(1, random_index)
        sig1 = sig1.gather(1, random_index)
        sig2 = sig2.gather(1, random_index)
        samples = torch.zeros(seq_len, 2)
        ro = ro.gather(1, random_index)

        for i in range(seq_len):
            mean = torch.tensor([mu1[i], mu2[i]])
            cov = torch.tensor([[sig1[i]**2, ro[i] * sig1[i] * sig2[i]],
                                [ro[i] * sig1[i] * sig2[i], sig2[i]**2]])

            m = MultivariateNormal(mean, cov)
            samples[i] = m.sample()
        return samples
 def act(self, seqs):
     if not self.seen.items():
         return sample(seqs, self.batch)
     seqs = np.array(seqs)
     X, Y = map(np.array, zip(*self.seen.items()))
     model = FittedGP(self.embed(X), Y)
     model.fit(epochs=epochs)
     mu, cov = model.predict_(self.embed(seqs))
     mvn = MultivariateNormal(torch.tensor(mu),
                              covariance_matrix=torch.tensor(cov) +
                              1e-4 * torch.eye(cov.shape[0]))
     mask = np.array([False for _ in seqs])
     choices = []
     for i in range(self.batch):
         samp = mvn.sample().data.numpy()
         low = samp.min()
         samp[mask] = low
         idx = np.argmax(samp)
         mask[idx] = True
         choices.append(seqs[idx])
     return choices
Exemplo n.º 8
0
 def compute_goals(self, observations):
     pen_vars_slice = self.pen_vars_slice
     goal_means, goal_stds = torch.split(self.goal_decoder(observations),
                                         self.goal_dim,
                                         dim=2)
     m = MultivariateNormal(
         goal_means, (goal_stds**2 + 0.01) *
         torch.eye(self.goal_dim))  # squaring stds so as to be positive
     goals = m.sample()
     log_prob_goals = m.log_prob(goals)
     #goals = torch.tanh(goals)
     goals = torch.clamp(goals, -1, 1)
     pen_pos = observations[:, :, pen_vars_slice][..., :3]
     pen_rot = observations[:, :, pen_vars_slice][..., 3:]
     rot_goal = goals[:, :, 3:]
     #rel_rot_goal = rot_goal*0.1+pen_rot
     rel_rot_goal = rot_goal + pen_rot
     goals = torch.cat([(goals[:, :, :3]) * 0.1 + pen_pos,
                        (rel_rot_goal) / torch.norm(rel_rot_goal)],
                       dim=2)
     return goals, log_prob_goals
Exemplo n.º 9
0
class GaussianDistribution(nn.Module):
    """
    Standard Normal Likelihood
    """
    def __init__(self, size):
        super().__init__()
        self.size = size
        self.dim = dim = int(np.prod(size))
        self.N = MultivariateNormal(torch.zeros(dim, device='cuda'),
                                    torch.eye(dim, device='cuda'))

    def forward(self, input, context=None):
        return self.log_prob(input, context).sum(-1)

    def log_prob(self, input, context=None, sum=True):
        return self.N.log_prob(input.view(-1, self.dim))

    def sample(self, n_samples, context=None):
        x = self.N.sample((n_samples, )).view(n_samples, *self.size)
        log_px = self.log_prob(x, context)
        return x, log_px
Exemplo n.º 10
0
def sampling(num_beads, num_trjs):
    """Sampling the states of beads in steady state.
    
    Args:
        num_beads : Number of beads. Here, we allow only 2 and 5.
        T1 : Leftmost temperature
        T2 : Rightmost temperature
        num_trjs : Number of trajectories you want. default = 1000.

    Returns:
        Sampled states from the probability density in steady state. 
    """
    assert num_beads in allow_num_beads, "'num_beads' must be 8, 16, 32, 64, or 128"

    cov = cov_dict[num_beads]

    N = MultivariateNormal(torch.zeros(num_beads),
                           torch.from_numpy(cov).float())
    positions = N.sample((num_trjs, ))

    return positions
class Normal(object):
    def __init__(self, dim, rho, device):
        assert abs(rho) <= 1
        self.dim = dim
        self.rho = rho
        self.pdf = MultivariateNormal(
            torch.zeros(dim).to(device),
            torch.eye(dim).to(device))

    def I(self):
        num_nats = - self.dim / 2 * math.log(1 - math.pow(self.rho, 2)) \
                   if abs(self.rho) != 1.0 else float('inf')
        return num_nats

    def hY(self):
        return 0.5 * self.dim * math.log(2 * math.pi)

    def draw_samples(self, num_samples):
        X, ep = torch.split(self.pdf.sample((2 * num_samples, )), num_samples)
        Y = self.rho * X + math.sqrt(1 - math.pow(self.rho, 2)) * ep
        return X, Y
Exemplo n.º 12
0
    def predict(self):
        # for i, s in enumerate(self.sigmas):
        #     self.sigmas[i] = self.fxu(s)
        self.sigmas = self.fxu(self.sigmas)

        # error for Pk|k=E(xk|k-xk|k-1)
        mn = MultivariateNormal(loc=self.__mean, covariance_matrix=self.Q)
        e = mn.sample((self.N, ))
        self.sigmas += e

        # Pk|k=1/(N-1)\sum(xk|k-xk|k-1)
        P = 0
        for s in self.sigmas:
            sx = s - self.x
            P += torch.ger(sx, sx)

        self.P = P / (self.N - 1)

        # save prior
        self.x_prior = self.x.clone()
        self.P_prior = self.P.clone()
Exemplo n.º 13
0
    def sample_full_rank(self, n_samples, mu, tril_elements, as_numpy=True):
        """Sample from a single Gaussian posterior with a full-rank covariance
        matrix

        Parameters
        ----------
        n_samples : int
            how many samples to obtain
        mu : torch.Tensor of shape `[self.batch_size, self.Y_dim]`
            network prediction of the mu (mean parameter) of the BNN posterior
        tril_elements : torch.Tensor of shape `[self.batch_size, tril_len]`
            network prediction of lower-triangular matrix in the log-Cholesky
            decomposition of the precision matrix

        Returns
        -------
        np.array of shape `[self.batch_size, n_samples, self.Y_dim]`
            samples
        """
        samples = torch.zeros([self.batch_size, n_samples, self.Y_dim],
                              device=self.device)
        for b in range(self.batch_size):
            tril = torch.zeros([self.Y_dim, self.Y_dim],
                               device=self.device,
                               dtype=None)
            tril[self.tril_idx[0], self.tril_idx[1]] = tril_elements[b, :]
            log_diag_tril = torch.diagonal(tril, offset=0, dim1=0, dim2=1)
            tril[torch.eye(self.Y_dim, dtype=bool)] = torch.exp(log_diag_tril)
            prec_mat = torch.mm(tril, tril.T)  # [Y_dim, Y_dim]
            mvn = MultivariateNormal(loc=mu[b, :], precision_matrix=prec_mat)
            sample_b = mvn.sample([
                n_samples,
            ])
            samples[b, :, :] = sample_b
        samples = self.unwhiten_back(samples)
        if as_numpy:
            return samples.cpu().numpy()
        else:
            return samples
Exemplo n.º 14
0
 def iw_sample(self, x, k=200):
     h = self.encoder(x)
     z, mu, logvar = self.bottleneck(h)
     ## looping over batch members
     logp_batch = []
     for idx, zvec in enumerate(z):
         mv_normal = MultivariateNormal(mu[idx],
                                        torch.diag(torch.exp(logvar[idx])))
         sample_list = []
         for i in range(k):
             sample_list.append(mv_normal.sample().cuda())
         samples = torch.stack(sample_list)
         x_samples = self.decoder(self.fc3(samples))
         x_tiled = x[idx].unsqueeze(0).repeat((k, 1, 1, 1))
         batch_iw_result = self.eval_likelihood(x_samples.cpu(),
                                                x_tiled.cpu(),
                                                samples.cpu(),
                                                mu[idx].cpu(),
                                                logvar[idx].cpu())
         logp_batch.append(batch_iw_result)
     logp = torch.stack(logp_batch)  #log p for each el in batch
     return logp
Exemplo n.º 15
0
def mm_gaussian(nsample, means, covars, weights):
    """Generates a mixture model of gaussians

    :ngaussian: the number of gaussians
    :nsample: the number of samples
    :nd: the dimension for the gaussian
    :means: the list of means
    :covar: the list of covariance matrices
    :weights: the weights for each of the gaussian

    :return: the samples in tensor format
    """
    assert len(means) == len(
        covars
    ), "Number of means or covariance matrices inconsistant with the number of gaussians"
    ngaussian = len(means)
    nd = means[0].size(0)
    weights.div_(weights.sum())
    #  weights = torch.tensor([0.5, 0.5])
    #  means = torch.tensor([[-3, 0], [3, 0]], dtype=torch.float)
    samples = torch.zeros(ngaussian, nsample, nd)
    for i, (mean, covar) in enumerate(zip(means, covars)):
        #  covar = I
        #  covar.div_(covar.max())
        #  corr = 0.01 * (R.t() + R) + 3*I  # cross correletion matrix
        #  covar = corr - torch.mm(mean.unsqueeze(1), mean.unsqueeze(1).t())
        multi_normal = MultivariateNormal(loc=mean, covariance_matrix=covar)
        samples[i] = multi_normal.sample((nsample, ))
    indices = np.random.permutation(nsample)  # the total range of indices
    range_idx = (0, 0)
    mm_sample = samples[0]  # the mixture model for the gaussian
    for i in range(ngaussian):
        n = int(
            0.5 +
            weights[i] * nsample)  # the number of samples belonging to this
        range_idx = range_idx[1], min(n + range_idx[1], nsample)
        idx = indices[range_idx[0]:range_idx[1]]
        mm_sample[idx] = samples[i, idx]
    return mm_sample.unsqueeze(2).unsqueeze(3)
Exemplo n.º 16
0
    def sample(self, jitter=1e-5):
        """ Sample the discretized GRF on the whole grid.


        Returns
        -------
        Z: (M) Tensor
            The sampled value of Z_{s_i} component l_i.
        jitter: float
            Jitter to add if covariance matrix is not diagonalisable.

        """
        K = self.covariance_mat.list
        mu = self.mean_vec.list

        # Sample M independent N(0, 1) RVs.
        # TODO: Determine if this is better than doing Cholesky ourselves.
        lower_chol = psd_safe_cholesky(K, jitter=jitter)
        distr = MultivariateNormal(loc=mu, scale_tril=lower_chol)
        sample = distr.sample()

        return GeneralizedVector.from_list(sample.float(), self.n_points,
                                           self.n_out)
Exemplo n.º 17
0
    def sample(self,
               x: torch.Tensor,
               raw_action: Optional[torch.Tensor] = None,
               deterministic: bool = False) -> Tuple[torch.Tensor, ...]:
        mean, log_std = self.forward(x)
        covariance = torch.diag_embed(log_std.exp())
        dist = MultivariateNormal(loc=mean, scale_tril=covariance)

        if not raw_action:
            if self._reparameterize:
                raw_action = dist.rsample()
            else:
                raw_action = dist.sample()

        action = torch.tanh(raw_action) if self._squash else raw_action
        log_prob = dist.log_prob(raw_action).unsqueeze(-1)
        if self._squash:
            log_prob -= self._squash_correction(raw_action)
        entropy = dist.entropy().unsqueeze(-1)

        if deterministic:
            action = torch.tanh(dist.mean)
        return action, log_prob, entropy
Exemplo n.º 18
0
 def forward(self, observations, lps):
     bs = observations[0].size(0)
     if self.h[0].size(1) != bs: self.init_hidden(bs)
     latents, self.h = self.rnn(
         torch.cat([observations, lps], dim=2), self.h
     )  #(seq_len, batch, input_size) -> (seq_len, batch, num_directions * hidden_size), (num_layers * num_directions, batch, hidden_size):
     #print(self.h)
     full_latent_and_observation = torch.cat([latents, observations], dim=2)
     latents = latents[:, :, :self.n_hidden]
     latent_and_observation = torch.cat([latents, observations], dim=2)
     goal_means, goal_stds = torch.split(self.goal_decoder(latents),
                                         self.goal_dim,
                                         dim=2)
     goal_means, goal_stds = 100 * torch.tanh(
         goal_means * 0.001), torch.tanh(goal_stds)
     m = MultivariateNormal(
         goal_means, (goal_stds**2 + 0.01) *
         torch.eye(self.goal_dim))  # squaring stds so as to be positive
     goals = m.sample()
     log_prob_goals = m.log_prob(goals)
     actions, log_prob_actions = self.predict_action(goals, observations)
     values = self.compute_value(goals, observations)
     lp_values = self.lp_decoder(full_latent_and_observation)
     return actions, log_prob_actions, goals, log_prob_goals, values, lp_values
Exemplo n.º 19
0
class BijectiveDistribution():
    def __init__(self):
        self.base = MultivariateNormal(torch.zeros(2, dtype=torch.float),
                                       covariance_matrix=torch.full(
                                           (2, ), 1., dtype=torch.float),
                                       precision_matrix=None,
                                       scale_tril=None,
                                       validate_args=None)

    def log_p_x(self, x, phi):
        """Compute log p(x| phi)
        """
        pass

    def log_p_z(self, z, phi):
        """Compute log p(z| phi)
        """
        pass

    def sample(self, phi):
        """Sample x ~ p(x| phi)
        """
        shape = ...  # TODO
        Z = self.base.sample(shape)
Exemplo n.º 20
0
# Number of repsones.
dim = 2
my_grid = Grid(100, dim)

# Observe some data.
S_y = torch.tensor([[0.2, 0.1], [0.2, 0.2], [0.2, 0.3],
        [0.2, 0.4], [0.2, 0.5], [0.2, 0.6],
        [0.2, 0.7], [0.2, 0.8], [0.2, 0.9],
        [0.2, 1.0]])
L_y = torch.tensor([0, 0, 0, 0, 0, 1, 1, 0 ,0 ,0])
y = torch.tensor(10*[-6])

krig_mean_grid, krig_mean_list, krig_mean_iso, K_cond_list, K_cond_iso = myGRF.krig_grid(
        my_grid, S_y, L_y, y,
        noise_std=0.05,
        compute_post_cov=True)

# Plot.
from meslas.plotting import plot_2d_slice, plot_krig_slice
plot_krig_slice(krig_mean_grid, S_y, L_y)

# Sample from the posterior.
from torch.distributions.multivariate_normal import MultivariateNormal
distr = MultivariateNormal(loc=krig_mean_list, covariance_matrix=K_cond_list)
sample = distr.sample()

# Reshape to a regular grid.
grid_sample = my_grid.isotopic_vector_to_grid(sample, n_out)
# plot_2d_slice(grid_sample)
plot_krig_slice(grid_sample, S_y, L_y)
Exemplo n.º 21
0
class RGCN(Module):

    def __init__(self, nnodes, nfeat, nhid, nclass, gamma=1.0, beta1=5e-4, beta2=5e-4, lr=0.01, dropout=0.6, device='cpu'):
        super(RGCN, self).__init__()

        self.device = device
        # adj_norm = normalize(adj)
        # first turn original features to distribution
        self.lr = lr
        self.gamma = gamma
        self.beta1 = beta1
        self.beta2 = beta2
        self.nclass = nclass
        self.nhid = nhid // 2
        # self.gc1 = GaussianConvolution(nfeat, nhid, dropout=dropout)
        # self.gc2 = GaussianConvolution(nhid, nclass, dropout)
        self.gc1 = GGCL_F(nfeat, nhid, dropout=dropout)
        self.gc2 = GGCL_D(nhid, nclass, dropout=dropout)

        self.dropout = dropout
        # self.gaussian = MultivariateNormal(torch.zeros(self.nclass), torch.eye(self.nclass))
        self.gaussian = MultivariateNormal(torch.zeros(nnodes, self.nclass),
                torch.diag_embed(torch.ones(nnodes, self.nclass)))
        self.adj_norm1, self.adj_norm2 = None, None
        self.features, self.labels = None, None

    def forward(self):
        features = self.features
        miu, sigma = self.gc1(features, self.adj_norm1, self.adj_norm2, self.gamma)
        miu, sigma = self.gc2(miu, sigma, self.adj_norm1, self.adj_norm2, self.gamma)
        output = miu + self.gaussian.sample().to(self.device) * torch.sqrt(sigma + 1e-8)
        return F.log_softmax(output, dim=1)

    def fit(self, features, adj, labels, idx_train, idx_val=None, train_iters=200, verbose=True):

        adj, features, labels = utils.to_tensor(adj.todense(), features.todense(), labels, device=self.device)

        self.features, self.labels = features, labels
        self.adj_norm1 = self._normalize_adj(adj, power=-1/2)
        self.adj_norm2 = self._normalize_adj(adj, power=-1)
        print('=== training rgcn model ===')
        self._initialize()
        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            self._train_with_val(labels, idx_train, idx_val, train_iters, verbose)

    def _train_without_val(self, labels, idx_train, train_iters, verbose=True):
        optimizer = optim.Adam(self.parameters(), lr=self.lr)
        self.train()
        for i in range(train_iters):
            optimizer.zero_grad()
            output = self.forward()
            loss_train = self._loss(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()
            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(i, loss_train.item()))

        self.eval()
        output = self.forward()
        self.output = output

    def _train_with_val(self, labels, idx_train, idx_val, train_iters, verbose):
        optimizer = optim.Adam(self.parameters(), lr=self.lr)

        best_loss_val = 100
        best_acc_val = 0

        for i in range(train_iters):
            self.train()
            optimizer.zero_grad()
            output = self.forward()
            loss_train = self._loss(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()
            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(i, loss_train.item()))

            self.eval()
            output = self.forward()
            loss_val = F.nll_loss(output[idx_val], labels[idx_val])
            acc_val = utils.accuracy(output[idx_val], labels[idx_val])

            if best_loss_val > loss_val:
                best_loss_val = loss_val
                self.output = output

            if acc_val > best_acc_val:
                best_acc_val = acc_val
                self.output = output

        print('=== picking the best model according to the performance on validation ===')


    def test(self, idx_test):
        # output = self.forward()
        output = self.output
        loss_test = F.nll_loss(output[idx_test], self.labels[idx_test])
        acc_test = utils.accuracy(output[idx_test], self.labels[idx_test])
        print("Test set results:",
              "loss= {:.4f}".format(loss_test.item()),
              "accuracy= {:.4f}".format(acc_test.item()))

    def _loss(self, input, labels):
        loss = F.nll_loss(input, labels)
        miu1 = self.gc1.miu
        sigma1 = self.gc1.sigma
        kl_loss = 0.5 * (miu1.pow(2) + sigma1 - torch.log(1e-8 + sigma1)).mean(1)
        kl_loss = kl_loss.sum()
        norm2 = torch.norm(self.gc1.weight_miu, 2).pow(2) + \
                torch.norm(self.gc1.weight_sigma, 2).pow(2)

        # print(f'gcn_loss: {loss.item()}, kl_loss: {self.beta1 * kl_loss.item()}, norm2: {self.beta2 * norm2.item()}')
        return loss  + self.beta1 * kl_loss + self.beta2 * norm2

    def _initialize(self):
        self.gc1.reset_parameters()
        self.gc2.reset_parameters()

    def _normalize_adj(self, adj, power=-1/2):

        """Row-normalize sparse matrix"""
        A = adj + torch.eye(len(adj)).to(self.device)
        D_power = (A.sum(1)).pow(power)
        D_power[torch.isinf(D_power)] = 0.
        D_power = torch.diag(D_power)
        return D_power @ A @ D_power
Exemplo n.º 22
0
class GaussianDensity:
    """
    Fits a multivariate Gaussian Density to input data with methods for computing log gradient.

    Parameters
    ----------
    None.

    Attributes
    ----------
    mean_ : array-like, shape (n_features,)
        The mean of the fitted Gaussian.

    covariance_ : array-like, shape (n_features, n_features)
        the covariance of the fitted Gaussian

    density_ : torch.MultivariateNormal object,
        The MultivariateNormal object fitted to the emperical mean and covariance
    """
    def __init__(self):
        self.mean_ = None
        self.covariance_ = None
        self.density_ = None

    def fit(self, X):
        """ Fits a multivariate Gaussian Density to the empirical data, X

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The empirical data which we will fit a Gaussian to.

        Returns
        -------
        self : the fitted instance"""

        X = check_array(X)
        self.mean_ = X.mean(axis=0).astype(np.float64)
        self.covariance_ = np.cov(X, rowvar=False) + 1e-5 * np.eye(X.shape[1])
        self.covariance_ = self.covariance_.astype(np.float64)
        self.density_ = MultivariateNormal(loc=torch.from_numpy(self.mean_),
                                           covariance_matrix=torch.from_numpy(
                                               self.covariance_))
        return self

    def sample(self, n_samples=1, random_state=None):
        """
        Samples from the fitted Gaussian.

        Parameters
        ----------
        n_samples: int, optional (default=1)
            Number of samples to generate.
        random_state: int, RandomState instance, or None, optional (default=None)
            If int, then the random state is set using np.random.RandomState(int),
            if RandomState instance, then the instance is used directly, if None then a RandomState instance is
            used as if np.random() was called

        Returns
        -------
        samples: array-like, shape (n_samples, n_features)
            The random samples from the fitted Gaussian.
        """

        self._check_fitted(
            'The density must be fitted before it can be sampled')
        rng = check_random_state(random_state)
        torch.manual_seed(
            rng.randint(10000))  # sets the torch seed using the rng from numpy
        return self.density_.sample((n_samples, )).numpy()

    def conditional_sample(self,
                           x,
                           feature_idx,
                           n_samples=1,
                           random_state=None):
        """
        Computes the conditional distribution of the jth feature of the density and samples from the conditional

        Parameters
        ----------
        x: array-like, shape (n_features)
            The sample which we are going to be conditioning on. More specifically, we will be condtioning on the value
            of the jth feature in x.
        feature_idx: int
            The index of the feature which we will compute the conditional distribution of (i.e. p(x_j | x_{-j})) 
        n_samples: int, optional (default=1)
            The number of sample to sample from the conditional distribution
        random_state: int, RandomState instance, or None, optional (default=None)
            If int, then the random state is set using np.random.RandomState(int),
            if RandomState instance, then the instance is used directly, if None then a RandomState instance is
            used as if np.random() was called

        Returns
        -------
        conditional_samples: array-like, shape (n_samples,)
            The samples from the conditional distribution. Note: These are univariate samples since this conditional
            is a univarient distribution.
        """
        self._check_fitted()
        rng = check_random_state(random_state)
        conditional_mean, conditional_var = self._calculate_1d_guassian_conditional(
            x,
            feature_idx,
            joint_mean=self.mean_,
            joint_cov=self.covariance,
            random_state=rng)
        conditional_samples = rng.normal(loc=conditional_mean,
                                         scale=conditional_var[0],
                                         size=n_samples)
        return conditional_samples

    def gradient_log_prob(self, X):
        """
        Computes the gradient of the log probability of the provided samples under the fitted Gaussian density

        Parameters
        ----------
        X: arrray-like (n_samples, n_features)
            The samples for which to compute the log-probability.

        Returns
        -------
        gradient-log-probability: array-like (n_samples, n_features)
            The gradient of the log probability of each sample"""

        # TODO: see if we can speed this up and perform the gradient on
        #  all the samples at once rather than one at a time
        self._check_fitted(
            "The density must be fitted before sample probabilities can be taken"
        )
        X = check_array(X, ensure_2d=False, dtype=np.float)
        if X.ndim == 1:
            X = X.reshape(1, -1)
        grad_log_probs = np.empty_like(X)
        X = torch.from_numpy(X)
        for sample_idx, sample in enumerate(
                X
        ):  # iterating over each sample to calc the gradient of the log prob.
            sample.requires_grad_(True)
            log_prob = self.density_.log_prob(sample)
            grad_log_probs[sample_idx] = torch.autograd.grad(
                log_prob, sample)[0]  # returns tuple with [0] as grad
        return grad_log_probs

    def log_prob(self, X):
        """
        Calculates the log probability of samples X under the fitted gaussian

        Parameters
        ----------
        X: arrray-like (n_samples, n_features)
            The samples for which to compute the log-probability.

        Returns
        -------
        log-probability: array-like (n_samples, n_features)
            The log probability of each sample"""
        self._check_fitted()
        X = check_array(X, ensure_2d=False, dtype=np.float)
        if X.ndim == 1:
            X = X.reshape(1, -1)
        X = torch.from_numpy(X)
        print(X.shape)
        return self.density_.log_prob(X).numpy()

    @staticmethod
    def _calculate_1d_guassian_conditional(x,
                                           feature_idx,
                                           joint_mean,
                                           joint_cov,
                                           random_state=None):
        """
        Computes the conditional distribution of the ith feature of the density and samples from the conditional
        ref: https://www.math.uwaterloo.ca/~hwolkowi/matrixcookbook.pdf   page 40.

        Parameters
        ----------
        x: array-like, shape (n_features)
            The sample which we are going to be conditioning on. More specifically, we will be condtioning on the value
            of the jth feature in x.
        feature_idx: int
            The index of the feature which we will compute the conditional distribution of (i.e. p(x_j | x_{-j}))
        joint_mean: array-like, shape (n_features)
            The mean vector of the joint model
        joint_cov: array-like, shape (n_features, n_features)
            The covaraince matrix of the joint model
            
        Returns
        -------
        conditional_mean: float
            The mean of the univariate conditional distribution
        conditional_variance: float
            The variance of the univariate conditional distribution
        """
        rng = check_random_state(random_state)
        mask = np.ones(len(x), dtype=bool)
        mask[feature_idx] = False
        x_nj = x[mask]
        means = np.array(joint_mean).flatten()

        # making it so that j (feature_idx) is the first column
        # i.e. \Simga = [ [var(x_j), cov(x_j, x_{-j})], [cov(x_{-j}, x_j), cov(x_j, x_j)] ]

        cov_11 = joint_cov[np.ix_(~mask, ~mask)]
        cov_12 = joint_cov[np.ix_(~mask, mask)]
        cov_22 = joint_cov[np.ix_(mask, mask)]
        cov_22_inv = np.linalg.inv(cov_22)

        conditional_mean = means[~mask] + cov_12 @ cov_22_inv @ (x_nj -
                                                                 means[mask])
        conditional_var = cov_11 - cov_12 @ cov_22_inv @ cov_12.T

        return conditional_mean, conditional_var

    def _check_fitted(self, error_message=None):
        if self.density_ is None:
            if error_message is None:
                raise ValueError(
                    'The density has not been fitted, please fit the density and try again'
                )
            else:
                raise ValueError(error_message)
        return True
Exemplo n.º 23
0
class MPPI():
    """
    Model Predictive Path Integral control
    This implementation batch samples the trajectories and so scales well with the number of samples K.

    Implemented according to algorithm 2 in Williams et al., 2017
    'Information Theoretic MPC for Model-Based Reinforcement Learning',
    based off of https://github.com/ferreirafabio/mppi_pendulum
    """
    def __init__(self,
                 dynamics,
                 running_cost,
                 nx,
                 noise_sigma,
                 num_samples=100,
                 horizon=15,
                 device="cpu",
                 terminal_state_cost=None,
                 lambda_=1.,
                 noise_mu=None,
                 u_min=None,
                 u_max=None,
                 u_init=None,
                 U_init=None,
                 step_dependent_dynamics=False,
                 dynamics_variance=None,
                 running_cost_variance=None,
                 sample_null_action=False):
        """
        :param dynamics: function(state, action) -> next_state (K x nx) taking in batch state (K x nx) and action (K x nu)
        :param running_cost: function(state, action) -> cost (K x 1) taking in batch state and action (same as dynamics)
        :param nx: state dimension
        :param noise_sigma: (nu x nu) control noise covariance (assume v_t ~ N(u_t, noise_sigma))
        :param num_samples: K, number of trajectories to sample
        :param horizon: T, length of each trajectory
        :param device: pytorch device
        :param terminal_state_cost: function(state) -> cost (K x 1) taking in batch state
        :param lambda_: temperature, positive scalar where larger values will allow more exploration
        :param noise_mu: (nu) control noise mean (used to bias control samples); defaults to zero mean
        :param u_min: (nu) minimum values for each dimension of control to pass into dynamics
        :param u_max: (nu) maximum values for each dimension of control to pass into dynamics
        :param u_init: (nu) what to initialize new end of trajectory control to be; defeaults to zero
        :param U_init: (T x nu) initial control sequence; defaults to noise
        :param step_dependent_dynamics: whether the passed in dynamics needs horizon step passed in (as 3rd arg)
        :param dynamics_variance: function(state) -> variance (K x nx) give variance of the state calcualted from dynamics
        :param running_cost_variance: function(variance) -> cost (K x 1) cost function on the state variances
        :param sample_null_action: Whether to explicitly sample a null action (bad for starting in a local minima)
        """
        self.d = device
        self.dtype = noise_sigma.dtype
        self.K = num_samples  # N_SAMPLES
        self.T = horizon  # TIMESTEPS

        # dimensions of state and control
        self.nx = nx
        self.nu = 1 if len(noise_sigma.shape) is 0 else noise_sigma.shape[0]
        self.lambda_ = lambda_

        if noise_mu is None:
            noise_mu = torch.zeros(self.nu, dtype=self.dtype)

        if u_init is None:
            u_init = torch.zeros_like(noise_mu)

        # handle 1D edge case
        if self.nu is 1:
            noise_mu = noise_mu.view(-1)
            noise_sigma = noise_sigma.view(-1, 1)

        # bounds
        self.u_min = u_min
        self.u_max = u_max
        # make sure if any of them is specified, both are specified
        if self.u_max is not None and self.u_min is None:
            self.u_min = -self.u_max
        if self.u_min is not None and self.u_max is None:
            self.u_max = -self.u_min
        if self.u_min is not None:
            self.u_min = self.u_min.to(device=self.d)
            self.u_max = self.u_max.to(device=self.d)

        self.noise_mu = noise_mu.to(self.d)
        self.noise_sigma = noise_sigma.to(self.d)
        self.noise_sigma_inv = torch.inverse(self.noise_sigma)
        self.noise_dist = MultivariateNormal(
            self.noise_mu, covariance_matrix=self.noise_sigma)
        # T x nu control sequence
        self.U = U_init
        self.u_init = u_init.to(self.d)

        if self.U is None:
            self.U = self.noise_dist.sample((self.T, ))

        self.step_dependency = step_dependent_dynamics
        self.F = dynamics
        self.dynamics_variance = dynamics_variance
        self.running_cost = running_cost
        self.running_cost_variance = running_cost_variance
        self.terminal_state_cost = terminal_state_cost
        self.sample_null_action = sample_null_action
        self.state = None

        # sampled results from last command
        self.cost_total = None
        self.cost_total_non_zero = None
        self.omega = None
        self.states = None
        self.actions = None
        if self.dynamics_variance is not None and self.running_cost_variance is None:
            raise RuntimeError(
                "Need to give running cost for variance when giving the dynamics variance"
            )

    def _dynamics(self, state, u, t):
        return self.F(state, u, t) if self.step_dependency else self.F(
            state, u)

    def command(self, state):
        """
        :param state: (nx) or (K x nx) current state, or samples of states (for propagating a distribution of states)
        :returns action: (nu) best action
        """
        # shift command 1 time step
        self.U = torch.roll(self.U, -1, dims=0)
        self.U[-1] = self.u_init

        if not torch.is_tensor(state):
            state = torch.tensor(state)
        self.state = state.to(dtype=self.dtype, device=self.d)

        cost_total = self._compute_total_cost_batch()

        beta = torch.min(cost_total)
        self.cost_total_non_zero = _ensure_non_zero(cost_total, beta,
                                                    1 / self.lambda_)

        eta = torch.sum(self.cost_total_non_zero)
        self.omega = (1. / eta) * self.cost_total_non_zero
        for t in range(self.T):
            self.U[t] += torch.sum(self.omega.view(-1, 1) * self.noise[:, t],
                                   dim=0)
        action = self.U[0]

        return action

    def reset(self):
        """
        Clear controller state after finishing a trial
        """
        self.U = self.noise_dist.sample((self.T, ))

    def _compute_total_cost_batch(self):
        # parallelize sampling across trajectories
        self.cost_total = torch.zeros(self.K, device=self.d, dtype=self.dtype)

        # allow propagation of a sample of states (ex. to carry a distribution), or to start with a single state
        if self.state.shape == (self.K, self.nx):
            state = self.state
        else:
            state = self.state.view(1, -1).repeat(self.K, 1)

        # resample noise each time we take an action
        self.noise = self.noise_dist.sample((self.K, self.T))
        # broadcast own control to noise over samples; now it's K x T x nu
        self.perturbed_action = self.U + self.noise
        if self.sample_null_action:
            self.perturbed_action[self.K - 1] = 0
        # naively bound control
        self.perturbed_action = self._bound_action(self.perturbed_action)
        # bounded noise after bounding (some got cut off, so we don't penalize that in action cost)
        self.noise = self.perturbed_action - self.U
        action_cost = self.lambda_ * self.noise @ self.noise_sigma_inv

        self.states = []
        self.actions = []
        for t in range(self.T):
            u = self.perturbed_action[:, t]
            state = self._dynamics(state, u, t)
            self.cost_total += self.running_cost(state, u)
            if self.dynamics_variance is not None:
                self.cost_total += self.running_cost_variance(
                    self.dynamics_variance(state))

            # Save total states/actions
            self.states.append(state)
            self.actions.append(u)

        # Actions is N x T x nu
        # States is N x T x nx
        self.actions = torch.stack(self.actions, dim=1)
        self.states = torch.stack(self.states, dim=1)

        # action perturbation cost
        perturbation_cost = torch.sum(self.perturbed_action * action_cost,
                                      dim=(1, 2))
        if self.terminal_state_cost:
            self.cost_total += self.terminal_state_cost(
                self.states, self.actions)
        self.cost_total += perturbation_cost
        return self.cost_total

    def _bound_action(self, action):
        if self.u_max is not None:
            for t in range(self.T):
                u = action[:, self._slice_control(t)]
                cu = torch.max(torch.min(u, self.u_max), self.u_min)
                action[:, self._slice_control(t)] = cu
        return action

    def _slice_control(self, t):
        return slice(t * self.nu, (t + 1) * self.nu)

    def get_rollouts(self, state, num_rollouts=1):
        """
            :param state: either (nx) vector or (num_rollouts x nx) for sampled initial states
            :param num_rollouts: Number of rollouts with same action sequence - for generating samples with stochastic
                                 dynamics
            :returns states: num_rollouts x T x nx vector of trajectories

        """
        state = state.view(-1, self.nx)
        if state.size(0) == 1:
            state = state.repeat(num_rollouts, 1)

        T = self.U.shape[0]
        states = torch.zeros((num_rollouts, T + 1, self.nx),
                             dtype=self.U.dtype,
                             device=self.U.device)
        states[:, 0] = state
        for t in range(T):
            states[:,
                   t + 1] = self._dynamics(states[:, t].view(num_rollouts, -1),
                                           self.U[t].view(num_rollouts, -1), t)
        return states[:, 1:]
Exemplo n.º 24
0
def nce_test():
    """
    Test implementation of NCE for Gaussian
    """
    #specify data size
    data_dim = 5
    Td = 100000
    noise_ratio = 50
    Tn = Td * noise_ratio
    Td_batch = 1000
    Tn_batch = Td_batch * noise_ratio

    #create Pd and create artificial data
    cov_base = th.tensor(make_spd_matrix(data_dim), dtype=th.float)
    tril_mat = th.tril(cov_base)
    cov_mat = th.matmul(tril_mat, tril_mat.t())
    true_c = -0.5 * th.log(th.abs(th.det(cov_mat))) - (data_dim / 2) * th.log(
        2 * th.tensor(np.pi))
    p_data = MVN(th.zeros(data_dim), scale_tril=tril_mat)
    data_labels = th.ones(Td)
    data_sample = th.utils.data.TensorDataset(p_data.sample((Td, )),
                                              data_labels)
    data_loader = th.utils.data.DataLoader(data_sample,
                                           batch_size=Td_batch,
                                           shuffle=True)

    #specify noise parameters for later use
    noise_cov_mat = th.eye(data_dim)

    #set up the model to be estimated
    cov_model = th.tensor(make_spd_matrix(data_dim), dtype=th.float)
    tril_mat_model = th.tril(cov_model)
    model = UnnormMVGaussian(th.zeros(data_dim), scale_tril=tril_mat_model)
    model.scale_tril.requires_grad = True
    model.normalizing_constant.requires_grad = True

    #set up optimization parameters
    start_epoch = 0
    end_epoch = 1000
    start_lr = 0.001
    momentum = 0.9
    decay_epochs = [50, 100, 250, 500, 750]
    decay_gamma = 0.1
    optimizer = th.optim.Adam([model.scale_tril, model.normalizing_constant],
                              lr=start_lr)
    lr_sched = th.optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=decay_epochs,
                                                 gamma=decay_gamma)

    #train
    for epoch in range(start_epoch, end_epoch):
        lr_sched.step()
        print(epoch)
        for i, (data_batch, data_labels) in enumerate(data_loader):
            #sample noise data for current input batch
            noise_distr = MVN(th.zeros(data_dim), noise_cov_mat)
            noise_batch = noise_distr.sample((Tn_batch, ))
            noise_labels = th.zeros(Tn_batch)
            #combine data and noise samples
            joint_batch = th.cat((data_batch, noise_batch), 0)
            joint_labels = th.cat((data_labels, noise_labels), 0)

            #forward pass
            log_P_model = model.log_prob(joint_batch)
            log_P_noise = noise_distr.log_prob(joint_batch)
            log_P_diff = log_P_model - log_P_noise + 1e-20
            loss = NCE_loss(log_P_diff, joint_labels, Td_batch, noise_ratio)
            print(loss.item(), true_c.item(),
                  model.normalizing_constant.item())
            print(F.mse_loss(model.scale_tril, p_data.scale_tril))

            #backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            noise_cov_mat = th.chain_matmul(model.scale_tril.detach(),
                                            model.scale_tril.detach().t())

    pdb.set_trace()
    def optimize(self, f, x0, sigma0=None, sigma_reg=None):
        """
        Runs CEM optimization up to convergence or terminates at max_iterations.

        Args:
            f (func): Cost function to optimized.
            x0 (ndarray): Initial distribution mean, shape (n_dims,), defaults to all zeros.
            sigma0 (ndarray): Initial distribution covariance, shape (n_dims, n_dims), defaults
                              to identity matrix.
            sigma_reg (ndarray): Additive regularization covariance to ensure well-conditioned,
                                 shape (n_dims, n_dims), defaults to small-scaled identity matrix.

        """
        if sigma0 is None:
            sigma0 = 3 * np.eye(3)
        if sigma_reg is None:
            sigma_reg = 1e-5 * np.eye(3)

        mu = torch.Tensor(x0)
        sigma = torch.Tensor(sigma0) + torch.Tensor(sigma_reg)
        distribution = MultivariateNormal(mu, sigma)
        converged = False
        cost = sys.maxsize
        x = None
        iterates = []
        sample_iterates = []
        for i in range(self.max_iterations):
            # Generate samples and evaluate costs to find elite set
            samples = distribution.sample((self.n_samples, )).data.numpy()
            sample_iterates.append(samples)

            costs = [f(s) for s in samples]
            sorted_samples = [
                s for _, s in sorted(zip(costs, samples), key=lambda x: x[0])
            ]
            elite = np.vstack(sorted_samples[:self.n_elite])

            # Re-fit the distribution based on elite set
            prev_distribution = distribution
            mu = torch.Tensor(np.mean(elite, axis=0))
            sigma = torch.Tensor(np.cov(elite.T)) + torch.Tensor(sigma_reg)
            distribution = MultivariateNormal(mu, sigma)
            # Check convergence based on KL-divergence between previous and current distributions
            kl = kl_divergence(prev_distribution, distribution).item()
            # print("kl:", kl)
            x = mu.data.numpy()
            iterates.append(x)

            if kl < self.kl_epsilon:
                converged = True
                cost = f(x)
                break

        if converged:
            print("\nCEM converged after {} iterations!".format(i))
            print("Solution: {}, Cost: {}\n".format(x, cost))
        else:
            print("\nCEM failed to converge after {} iterations. :(\n"
                  "".format(self.max_iterations))

        return x, cost, converged, np.array(iterates), np.array(
            sample_iterates)
    def optimize_GMM(self, f, x0, sigma0=None, sigma_reg=None):
        """
        Runs CEM optimization up to convergence or terminates at max_iterations.

        Args:
            f (func): Cost function to optimized.
            x0 (ndarray): Initial distribution mean, shape (n_dims,), defaults to all zeros.
            sigma0 (ndarray): Initial distribution covariance, shape (n_dims, n_dims), defaults
                              to identity matrix.
            sigma_reg (ndarray): Additive regularization covariance to ensure well-conditioned,
                                 shape (n_dims, n_dims), defaults to small-scaled identity matrix.

        """
        if sigma0 is None:
            sigma0 = 2 * np.eye(3)
        if sigma_reg is None:
            sigma_reg = 1e-5 * np.eye(3)

        self.kl_epsilon = 10

        mu = torch.Tensor(x0)
        sigma = torch.Tensor(sigma0) + torch.Tensor(sigma_reg)
        distribution = MultivariateNormal(mu, sigma)
        samples = distribution.sample((self.n_samples, )).data.numpy()

        GMM = mixture.GaussianMixture(n_components=3, covariance_type='full')
        data = samples
        GMM.fit(data)
        #
        # m = GMM.means_
        # w = GMM.weights_
        # cov = GMM.covariances_

        converged = False
        cost = sys.maxsize
        x = None
        iterates = []
        sample_iterates = []
        for i in range(self.max_iterations):
            # Generate samples and evaluate costs to find elite set
            samples = GMM.sample(self.n_samples)[0]
            # print(samples)
            sample_iterates.append(samples)

            costs = [f(s) for s in samples]
            sorted_samples = [
                s for _, s in sorted(zip(costs, samples), key=lambda x: x[0])
            ]
            elite = np.vstack(sorted_samples[:self.n_elite])

            # Re-fit the distribution based on elite set
            prev_GMM = GMM
            GMM = mixture.GaussianMixture(n_components=2,
                                          covariance_type='full')
            GMM.fit(elite)
            # Check convergence based on KL-divergence between previous and current distributions
            kl = self.gmm_kl(prev_GMM, GMM)
            print("kl:", kl)
            x = GMM.means_
            iterates.append(x)

            if kl < self.kl_epsilon:
                converged = True
                cost = [f(s) for s in x]
                break

        if converged:
            print("\nCEM converged after {} iterations!".format(i))
            print("Solution: {}, Cost: {}\n".format(x, cost))
        else:
            print("\nCEM failed to converge after {} iterations. :(\n"
                  "".format(self.max_iterations))

        return x, cost, converged, np.array(iterates), np.array(
            sample_iterates)
Exemplo n.º 27
0
class UpperPolicy:
    """Upper-level policy.

    Upper-level policy \pi(w | s) implemented as a linear-Gaussian model
    parametrized by {a, A, sigma}:
            \pi(w | s) = N(w | a + As, sigma)

    Parameters
    ----------

    n_context: int
        Number of context features

    torchOut: bool, optional (default: True)
        If True the policy returns torch tensors, otherwise numpy arrays

    verbose: bool, optional (default: False)
        If True prints the policy parameters after a policy update
    """
    def __init__(self, n_context, torchOut=True, verbose=False):
        self.n_context = n_context
        self.torchOut = torchOut
        self.verbose = verbose

    def set_parameters(self, a, A, sigma):
        """Set the paramaters of the upper-level policy.

        Parameters
        ----------

        a: numpy.ndarray or torch.Tensor, shape (1, n_lower_policy_weights)
            Parameter 'a'

        A: numpy.ndarray or torch.Tensor, shape (n_context_features,
                                                n_lower_policy_weights)
            Parameter 'A'

        sigma: numpy.ndarray or torch.Tensor, shape (n_lower_policy_weights,
                                                    n_lower_policy_weights)
            Covariance matrix
        """
        n_lower_policy_weights = a.shape[1]
        assert (a.shape[0] == 1 and A.shape[1] == n_lower_policy_weights
                and A.shape[0] == self.n_context
                and sigma.shape[0] == n_lower_policy_weights and sigma.shape[1]
                == n_lower_policy_weights), "Incorrect parameter sizes"

        if type(a).__module__ == np.__name__:  #Assume all same type
            self.a = torch.from_numpy(a)
            self.A = torch.from_numpy(A)
            self.sigma = torch.from_numpy(sigma)
        else:
            self.a = a
            self.A = A
            self.sigma = sigma
        self.mvnrnd = MultivariateNormal(self.a.view(-1), self.sigma)

    def sample(self, S):
        """Sample the upper-level policy given the context features.

        Sample distribution \pi(w | s) = N(w | a + As, sigma)

        If PyTorch is being used, the input should be a PyTorch tensor and
        torch.distributions.multivariate_normal is be used, returning a
        tensor. Otherwise, the input should be a numpy array and
        numpy.random.multivariate_normal is used, returning a numpy array.

        Parameters
        ----------

        S: numpy.ndarray or torch.Tensor, shape (n_samples, n_context_features)
            Context features

        Returns
        -------

        W: numpy.ndarray or torch.Tensor, shape (n_samples,
                                                n_lower_policy_weights)
           Sampled lower-policy parameters.
        """
        if type(S).__module__ == np.__name__:
            S = torch.from_numpy(S)

        W = torch.zeros(S.shape[0], self.a.shape[1], dtype=torch_type)
        mus = self.mean(S)

        if not self.torchOut:
            mus = torch.from_numpy(mus)

        for sample in range(S.shape[0]):
            self.mvnrnd.loc = mus[sample, :]
            W[sample, :] = self.mvnrnd.sample()

        if self.torchOut:
            return W
        else:
            return W.numpy()

    def mean(self, S):
        """Return the upper-level policy mean given the context features.

        The mean of the distribution is N(w | a + As, sigma)

        Parameters
        ----------

        S: numpy.ndarray or torch.Tensor, shape (n_samples, n_context_features)
            Context features

        Returns
        -------

        W: numpy.ndarray or torch.Tensor, shape (n_samples,
                                                n_lower_policy_weights)
           Distribution mean for contexts
        """
        if type(S).__module__ == np.__name__:
            S = torch.from_numpy(S)

        mu = self.a + S.mm(self.A)

        if self.torchOut:
            return mu
        else:
            return mu.numpy()

    def update(self, w, F, p):
        """Update the upper-level policy parametersself.

        Update is done using weighted maximum likelihood.

        Parameters
        ----------

        w: numpy.ndarray or torch.Tensor, shape (n_samples,
                                                n_lower_policy_weights)
            Lower-level policy weights

        F: numpy.ndarray or torch.Tensor, shape (n_samples, n_context_features)
            Context features

        p: torch.Tensor, shape (n_samples,)
            Sample weights
        """
        n_samples = w.shape[0]
        n_lower_policy_weights = self.a.shape[1]
        assert (w.shape[1] == n_lower_policy_weights
                and F.shape[0] == n_samples and F.shape[1] == self.n_context
                and p.shape[0] == n_samples)

        if type(F).__module__ == np.__name__:
            F = torch.from_numpy(F)
        if type(w).__module__ == np.__name__:
            w = torch.from_numpy(w)

        S = torch.cat((torch.ones(p.shape[0], 1, dtype=torch_type), F), 1)
        P = p.diag()
        bigA = torch.pinverse(S.t().mm(P).mm(S)).mm(S.t()).mm(P).mm(w)
        a = bigA[0, :].view(1, -1)

        wd = w - a
        sigma = (p * wd.t()).mm(wd)

        self.set_parameters(a, bigA[1:, :], sigma)

        if self.verbose:
            print('Policy update: a, A, mean of sigma')
            print(self.a)
            print(self.A)
            print(self.sigma.mean())
Exemplo n.º 28
0
        target_images.append(images[labels == 1])

    target_images = torch.cat(target_images)
    down_flat = downsample(target_images).view(len(target_images), -1)
    mean = down_flat.mean(dim=0)
    down_flat = down_flat - mean.unsqueeze(dim=0)
    cov = down_flat.t() @ down_flat / len(target_images)
    dist = MultivariateNormal(
        mean,
        covariance_matrix=cov +
        1e-4 * torch.eye(3 * DATA_SHAPE // GRAIN * DATA_SHAPE // GRAIN))
    with open(dog_dist_file, 'wb') as f:
        pickle.dump(dist, f)

num_samples = args.nrows * 8
seeds = dist.sample((num_samples, )).view(num_samples, 3, DATA_SHAPE // GRAIN,
                                          DATA_SHAPE // GRAIN)
seeds.clamp_(0, 1)
seeds = upsample(seeds)

model = models.resnet50()
model.load_state_dict(torch.load(args.checkpoint))
model.to(device)
model.eval()

if args.eps == 40:
    attack_config = {'norm': 'L2', 'eps': 40, 'step_size': 1, 'steps': 60}
else:
    attack_config = {'norm': 'L2', 'eps': 100, 'step_size': 10, 'steps': 100}

print('attack: {}'.format(attack_config))
Exemplo n.º 29
0
class RGCN(Module):
    """Robust Graph Convolutional Networks Against Adversarial Attacks. KDD 2019.

    Parameters
    ----------
    nnodes : int
        number of nodes in the input grpah
    nfeat : int
        size of input feature dimension
    nhid : int
        number of hidden units
    nclass : int
        size of output dimension
    gamma : float
        hyper-parameter for RGCN. See more details in the paper.
    beta1 : float
        hyper-parameter for RGCN. See more details in the paper.
    beta2 : float
        hyper-parameter for RGCN. See more details in the paper.
    lr : float
        learning rate for GCN
    dropout : float
        dropout rate for GCN
    device: str
        'cpu' or 'cuda'.

    """
    def __init__(self,
                 nnodes,
                 nfeat,
                 nhid,
                 nclass,
                 gamma=1.0,
                 beta1=5e-4,
                 beta2=5e-4,
                 lr=0.01,
                 dropout=0.6,
                 device='cpu'):
        super(RGCN, self).__init__()

        self.device = device
        # adj_norm = normalize(adj)
        # first turn original features to distribution
        self.lr = lr
        self.gamma = gamma
        self.beta1 = beta1
        self.beta2 = beta2
        self.nclass = nclass
        self.nhid = nhid // 2
        # self.gc1 = GaussianConvolution(nfeat, nhid, dropout=dropout)
        # self.gc2 = GaussianConvolution(nhid, nclass, dropout)
        self.gc1 = GGCL_F(nfeat, nhid, dropout=dropout)
        self.gc2 = GGCL_D(nhid, nclass, dropout=dropout)

        self.dropout = dropout
        # self.gaussian = MultivariateNormal(torch.zeros(self.nclass), torch.eye(self.nclass))
        self.gaussian = MultivariateNormal(
            torch.zeros(nnodes, self.nclass),
            torch.diag_embed(torch.ones(nnodes, self.nclass)))
        self.adj_norm1, self.adj_norm2 = None, None
        self.features, self.labels = None, None

    def forward(self):
        features = self.features
        miu, sigma = self.gc1(features, self.adj_norm1, self.adj_norm2,
                              self.gamma)
        miu, sigma = self.gc2(miu, sigma, self.adj_norm1, self.adj_norm2,
                              self.gamma)
        output = miu + self.gaussian.sample().to(
            self.device) * torch.sqrt(sigma + 1e-8)
        return F.log_softmax(output, dim=1)

    def fit(self,
            features,
            adj,
            labels,
            idx_train,
            idx_val=None,
            train_iters=200,
            verbose=True,
            **kwargs):
        """Train RGCN.

        Parameters
        ----------
        features :
            node features
        adj :
            the adjacency matrix. The format could be torch.tensor or scipy matrix
        labels :
            node labels
        idx_train :
            node training indices
        idx_val :
            node validation indices. If not given (None), GCN training process will not adpot early stopping
        train_iters : int
            number of training epochs
        verbose : bool
            whether to show verbose logs
        """

        adj, features, labels = utils.to_tensor(adj.todense(),
                                                features.todense(),
                                                labels,
                                                device=self.device)

        self.features, self.labels = features, labels
        self.adj_norm1 = self._normalize_adj(adj, power=-1 / 2)
        self.adj_norm2 = self._normalize_adj(adj, power=-1)
        print('=== training rgcn model ===')
        self._initialize()
        if idx_val is None:
            self._train_without_val(labels, idx_train, train_iters, verbose)
        else:
            self._train_with_val(labels, idx_train, idx_val, train_iters,
                                 verbose)

    def _train_without_val(self, labels, idx_train, train_iters, verbose=True):
        optimizer = optim.Adam(self.parameters(), lr=self.lr)
        self.train()
        for i in range(train_iters):
            optimizer.zero_grad()
            output = self.forward()
            loss_train = self._loss(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()
            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(
                    i, loss_train.item()))

        self.eval()
        output = self.forward()
        self.output = output

    def _train_with_val(self, labels, idx_train, idx_val, train_iters,
                        verbose):
        optimizer = optim.Adam(self.parameters(), lr=self.lr)

        best_loss_val = 100
        best_acc_val = 0

        for i in range(train_iters):
            self.train()
            optimizer.zero_grad()
            output = self.forward()
            loss_train = self._loss(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()
            if verbose and i % 10 == 0:
                print('Epoch {}, training loss: {}'.format(
                    i, loss_train.item()))

            self.eval()
            output = self.forward()
            loss_val = F.nll_loss(output[idx_val], labels[idx_val])
            acc_val = utils.accuracy(output[idx_val], labels[idx_val])

            if best_loss_val > loss_val:
                best_loss_val = loss_val
                self.output = output

            if acc_val > best_acc_val:
                best_acc_val = acc_val
                self.output = output

        print(
            '=== picking the best model according to the performance on validation ==='
        )

    def test(self, idx_test):
        """Evaluate the peformance on test set
        """
        # output = self.forward()
        output = self.output
        loss_test = F.nll_loss(output[idx_test], self.labels[idx_test])
        acc_test = utils.accuracy(output[idx_test], self.labels[idx_test])
        print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
              "accuracy= {:.4f}".format(acc_test.item()))

    def _loss(self, input, labels):
        loss = F.nll_loss(input, labels)
        miu1 = self.gc1.miu
        sigma1 = self.gc1.sigma
        kl_loss = 0.5 * (miu1.pow(2) + sigma1 -
                         torch.log(1e-8 + sigma1)).mean(1)
        kl_loss = kl_loss.sum()
        norm2 = torch.norm(self.gc1.weight_miu, 2).pow(2) + \
                torch.norm(self.gc1.weight_sigma, 2).pow(2)

        # print(f'gcn_loss: {loss.item()}, kl_loss: {self.beta1 * kl_loss.item()}, norm2: {self.beta2 * norm2.item()}')
        return loss + self.beta1 * kl_loss + self.beta2 * norm2

    def _initialize(self):
        self.gc1.reset_parameters()
        self.gc2.reset_parameters()

    def _normalize_adj(self, adj, power=-1 / 2):
        """Row-normalize sparse matrix"""
        A = adj + torch.eye(len(adj)).to(self.device)
        D_power = (A.sum(1)).pow(power)
        D_power[torch.isinf(D_power)] = 0.
        D_power = torch.diag(D_power)
        return D_power @ A @ D_power
Exemplo n.º 30
0
    def sample_loss(self, x, mu_z, logvar_z, num_samples):
        z = []
        bs = x.shape[0]
        var_z = torch.exp(logvar_z)
        Sigma = self.batch_diag(mu_z, var_z)
        
        dist_z = MultivariateNormal(mu_z, Sigma)
        for i in range(num_samples):
            z.append(dist_z.sample())
        
        K = len(z)       
        x_exps = []
        z1_exps = []
        z2_exps = []
        means_x = []
        #vars_x = []
        yi = []

        with torch.no_grad():
            for sample in z:
                #mu_x, logvar_x = self.decode(sample)
                mu_x = self.decode(sample)
                #var_x = torch.exp(logvar_x)
                means_x.append(mu_x)
                #vars_x.append(var_x)
                #x_exp = self.norm_dist_exp(x, mu_x, var_x)
                x_exp = torch.sum(x * torch.log(mu_x) + (1 - x) * torch.log(1 - mu_x), dim=1)
                z1_exp = self.norm_dist_exp(sample, torch.zeros(bs, sample.shape[1]).to(device), torch.ones(bs, sample.shape[1]).to(device))
                z2_exp = self.norm_dist_exp(sample, mu_z, var_z)
                yi.append((x_exp + z1_exp - z2_exp).unsqueeze(-1))
                #x_exps.append(x_exp.unsqueeze(-1))
                #z1_exps.append(z1_exp.unsqueeze(-1))
                #z2_exps.append(z2_exp.unsqueeze(-1))
        
        
        """
        x_exps_tensor = torch.cat(x_exps, dim=1).to(device)
        z1_exps_tensor = torch.cat(z1_exps, dim=1).to(device)
        z2_exps_tensor = torch.cat(z2_exps, dim=1).to(device)
        x_exps_max = torch.max(x_exps_tensor, dim=1)[0]
        z1_exps_max = torch.max(z1_exps_tensor, dim=1)[0]
        z2_exps_max = torch.max(z2_exps_tensor, dim=1)[0]
        """
        yi_tensor = torch.cat(yi, dim=1).to(device)
        yi_max = torch.max(yi_tensor, dim=1)[0]
        
        pq_sum_tensor = torch.zeros(bs).to(device)
        
        y_sum = torch.zeros(bs).to(device)
        for log_yi in yi:
            y_sum += torch.exp(log_yi.squeeze() - yi_max)
        """
        for inx, sample in enumerate(z):
            mu_x = means_x[inx]
            #var_x = vars_x[inx]
            #p_x_z, diff_x = self.norm_dist(x, mu_x, var_x, x_exps_max)
            diff_x = torch.sum(x * torch.log(mu_x) + (1 - x) * torch.log(1 - mu_x), dim=1) - x_exps_max
            p_x_z = torch.exp(diff_x)
            p_z, diff_z1 = self.norm_dist(sample, torch.zeros(bs, sample.shape[1]).to(device), torch.ones(bs, sample.shape[1]).to(device), z1_exps_max)
            #q_z_x, diff_z2 = self.norm_dist(sample, mu_z, var_z, z2_exps_max)
            q_z_x = torch.exp(self.norm_dist_exp(sample, mu_z, var_z))
            #diff = diff_x + diff_z1 - diff_z2
            diff = diff_x + diff_z1
            pq_sum = (p_x_z*p_z)/q_z_x
            #big_pq = torch.zeros_like(pq_sum).to(device)
            #for i in range(bs):
            #    if diff[i] >= -10:
            #        big_pq[i] = pq_sum[i]
            #pq_sum_tensor += big_pq
            pq_sum_tensor += pq_sum
        """
        
        #C = torch.ones(bs).to(device)
        #C.new_full((bs,), (-(x.shape[1])/2)*math.log(2*math.pi))
        #C = (-x.shape[1]/2)*math.log(2*math.pi)
            
        #return -(C + x_exps_max + z1_exps_max - z2_exps_max + torch.log((1/K)*pq_sum_tensor))
        #return -(x_exps_max + z1_exps_max - z2_exps_max + torch.log((1/K)*pq_sum_tensor))
        #return -(x_exps_max + z1_exps_max + torch.log((1/K)*pq_sum_tensor))
        return -(yi_max + torch.log(y_sum))
Exemplo n.º 31
0
#print(network(torch.ones((10,6)) ))
s1 = torch.tensor([1, 2, 3, 4, 5, 6], dtype=torch.float32)
s2 = torch.tensor([0, 1, 3, 4, 5, 6], dtype=torch.float32)

new_prob = network(torch.stack([s1, s2]))
# print( torch.eye(3) * new_prob[:,action_space.shape[0]:] )
S = torch.zeros((2, 9))
S[:, np.array([0, 4, 8])] = new_prob[:, action_space.shape[0]:]
S = S.reshape(2, 3, 3)

# print(new_prob[:,action_space.shape[0]:])
# print(S)

new_dist_buffer = MultivariateNormal(new_prob[:, :action_space.shape[0]], S)
#new_dist_buffer = Normal(loc=new_prob[:,:action_space.shape[0]], scale=new_prob[:,action_space.shape[0]:], validate_args=True)
action = new_dist_buffer.sample()
# print(n.log_prob( actions ))

# print(new_dist_buffer.log_prob(action))
# print(n.log_prob(action))

# print(new_dist_buffer.sample() )
# action = new_dist_buffer.sample()
log_buffer = [
    new_dist_buffer.log_prob(action[0]).reshape(-1),
    new_dist_buffer.log_prob(action[1]).reshape(-1)
]
# print(torch.stack(log_buffer).shape)
dist_buffer = torch.exp(torch.stack(log_buffer).detach())
# print(di)
Exemplo n.º 32
0
    def train(self, max_num_steps):
        """
        Train the policy for the given number of iterations
        :param num_steps:The number of steps to train the policy for
        """
        assert isinstance(max_num_steps, int) and max_num_steps > 0

        gamma = self.gamma
        state = self.env.reset()
        done = False
        best_r = -np.inf

        # Training starts now
        for num_steps in tqdm(range(max_num_steps)):
            # Reset env when it reached terminal state
            if done:
                self.num_episodes += 1
                state = self.env.reset()

            state_tensor = torch.from_numpy(state).float().unsqueeze(dim=1).T
            action = self.actor(state_tensor).detach()

            # Sampling from the nD Gaussian
            m = MultivariateNormal(action, torch.eye(self.action_dim) * 0.1)
            action = m.sample()
            action = action.detach().squeeze().numpy()

            state_next, rewd, done, _ = self.env.step(action)

            # Storing transition in buffer
            self.ReplayBuffer.buffer_add((state, action, rewd, state_next))
            state = state_next

            # Sampling N points from buffer
            minibatch = self.ReplayBuffer.buffer_sample(N=self.batch_size)

            # Extracting data from minibatch
            s = torch.Tensor([el[0] for el in minibatch
                              ])  #dim (batch_size, state_dim)
            a = torch.Tensor([el[1] for el in minibatch
                              ])  #dim (batch_size, action_dim)
            r = torch.Tensor([el[2] for el in minibatch
                              ]).unsqueeze(dim=1)  #dim (batch_size, 1)
            s_prime = torch.Tensor([el[3] for el in minibatch
                                    ])  #dim (batch_size, state_dim)

            a_target = self.actor_target(
                s_prime).detach()  #dim(batch_size, action_dim)
            y_i = r + gamma * self.critic_target(
                s_prime, a_target).detach()  #dim (batch_size, 1)
            loss_critic = self.mse_loss(y_i, self.critic(
                s, a))  #mse loss      #dim (batch_size, 1)

            # Zero gradients of optimizer
            self.optimizer_critic.zero_grad()
            self.optimizer_actor.zero_grad()

            # Update critic
            loss_critic.backward()
            self.optimizer_critic.step()

            a = self.actor(s)
            obj_actor = (self.critic(s, a).mean()) * -1.0

            # Update actor
            obj_actor.backward()
            self.optimizer_actor.step()

            # Update target networks
            self.update_target_networks()

            # Store losses
            self.obj_actor.append(obj_actor.item())
            self.loss_critic.append(loss_critic.item())

            # Evaluating and Printing
            if num_steps % self.ev_n_steps == 0:
                r = self.getAverageReward()
                # Saving best actor model till now
                is_best = r > best_r
                best_r = max(r, best_r)
                if is_best:
                    self.best_actor_state = {
                        'state_dict': self.actor.state_dict(),
                        'optimizer': self.optimizer_actor.state_dict(),
                        'avg_reward': r,
                        'obj_actor': obj_actor.item(),
                        'loss_critic': loss_critic.item(),
                        'seed': args.seed
                    }

                self.avg_rewards.append(r)
                if verbose:
                    print(
                        'Num steps: {0} \t Avg Reward: {1:.3f} \t Obj(Actor): {2:.3f} \t Loss(Critic): {3:.3f} \t Num eps: {4}'
                        .format(num_steps, r, obj_actor.item(),
                                loss_critic.item(), self.num_episodes))
Exemplo n.º 33
0
    mean = down_flat.mean(dim=0)
    down_flat = down_flat - mean.unsqueeze(dim=0)
    cov = down_flat.t() @ down_flat / len(imc)
    dist = MultivariateNormal(mean, covariance_matrix=cov+1e-4*ch.eye(1 * DATA_SHAPE//GRAIN * DATA_SHAPE//GRAIN))
    conditionals.append(dist)
'''
imc = im_test
down_flat = downsample(imc).view(len(imc), -1)
mean = down_flat.mean(dim=0)
down_flat = down_flat - mean.unsqueeze(dim=0)
cov = down_flat.t() @ down_flat / len(imc)
dist = MultivariateNormal(
    mean,
    covariance_matrix=cov +
    1e-4 * ch.eye(1 * DATA_SHAPE // GRAIN * DATA_SHAPE // GRAIN))
dist = dist.sample().view(1, DATA_SHAPE // GRAIN,
                          DATA_SHAPE // GRAIN).cpu().numpy()
conditionals.append(dist)
conditionals.append(dist)
conditionals.append(dist)
conditionals.append(dist)

# Visualize seeds
#img_seed = ch.stack([conditionals[i].sample().view(1, DATA_SHAPE//GRAIN, DATA_SHAPE//GRAIN)
#                     for i in range(NUM_CLASSES_VIS)])
conditionals = [
    cv2.GaussianBlur(conditionals[i], (11, 11), 15)
    for i in range(NUM_CLASSES_VIS)
]
conditionals = [
    cv2.GaussianBlur(conditionals[i], (11, 11), 15)
    for i in range(NUM_CLASSES_VIS)