Пример #1
0
 def _compute_loss(self, scores, targets):
     num_train = scores.shape[0]
     probabilities = softmax(scores)
     loss = -np.sum(np.log(probabilities[np.arange(num_train), targets])) / num_train
     probabilities[np.arange(num_train), targets] -= 1
     dsoftmax = probabilities / num_train
     return loss, dsoftmax
Пример #2
0
    def predict(self, new_data, **kwargs):
        # make sure the new_data is shaped like the train data
        if new_data.shape[1] != 28 * 28 + 1 and new_data.shape[1] != 28 * 28:
            new_data = new_data.reshape(new_data.shape[0], 28*28)
        if self.add_bias and new_data.shape[1] != 28*28 + 1:
            new_data = np.hstack((new_data, np.ones((new_data.shape[0], 1))))

        scores = np.dot(new_data, self.W.T)
        probs = softmax(scores)     # this is unnecessary as the softmax function will not change the order
        return np.argmax(probs, axis=1)
Пример #3
0
    def sample(self, latent_mean, latent_cov):
        num_samples = latent_mean.shape[0]
        pi_star = np.zeros((num_samples, n_classes))
        print("\t\tPerforming MCMC sampling from the posterior latent function")
        for k in xrange(num_samples):
            f_sampled = np.random.multivariate_normal(mean=latent_mean[k], cov=latent_cov[k], size=self.sampling_steps)
            # class posterior is a softmax
            pi_star[k, :] = np.sum(softmax(f_sampled), axis=0)
        pi_star /= float(self.sampling_steps)

        return pi_star
Пример #4
0
 def compute_latent_mean_cov_multiclass(self, cov_matrix_train,
                                        cov_matrix_test, f_posterior):
     print("\t\tComputing latent mean and covariance")
     num_test_samples = cov_matrix_test['hetero'][0].shape[0]
     # for more details see Algorithm 3.4, p51 from Rasmussen's Gaussian Processes
     pi = softmax(f_posterior)
     E = list()
     z = list()
     for cls in xrange(n_classes):
         # compute pi and use it as Pi too
         pi_sqrt_cls = spm.diags(np.sqrt(pi[cls]), format='csc')
         # cholesky(I + D_c^(1/2) * K * D_c^(1/2))
         L = spl.cholesky((spm.identity(self.num_samples) + pi_sqrt_cls.dot(
             spm.csc_matrix(
                 cov_matrix_train[cls]).dot(pi_sqrt_cls))).toarray(),
                          lower=True)
         # E_c = D_c^(1/2) * L^T \ (L \ D_c^(1/2))
         E.append((pi_sqrt_cls.dot(
             spsl.spsolve(spm.csc_matrix(L.T),
                          spsl.spsolve(spm.csc_matrix(L),
                                       pi_sqrt_cls)))).toarray())
     E = np.asarray(E)
     # M = cholesky(sum_c E_c)
     M = spl.cholesky(np.sum(E, axis=0), lower=True)
     latent_means = np.zeros((num_test_samples, n_classes))
     latent_covs = np.zeros((num_test_samples, n_classes, n_classes))
     for cls in xrange(n_classes):
         latent_means[:, cls] = cov_matrix_test['hetero'][cls].dot(
             self.one_hot_targets[:, cls] - pi[cls])
         b = E[cls].dot(cov_matrix_test['hetero'][cls].T)
         c = E[cls].dot(spl.solve(M.T, spl.solve(M, b)))
         for cls_hat in xrange(n_classes):
             latent_covs[:, cls, cls_hat] = np.einsum(
                 'ij,ij->i', cov_matrix_test['hetero'][cls_hat], c.T)
         latent_covs[:, cls, cls] += cov_matrix_test['auto'][cls] - \
                                     np.einsum('ij,ij->i', cov_matrix_test['hetero'][cls], b.T)
     return latent_means, latent_covs
Пример #5
0
    def approximate_multiclass(self, cov_matrix, targets, latent_init):
        print("\t\tComputing the Laplace approximation with Newton iterations")
        self.one_hot_targets = OneHotLabels(n_classes).generate_labels(targets)

        self.iter_counter = 0
        self.num_samples = targets.shape[0]

        # initialise the temporary result storage variables and the latent function
        f = latent_init.copy()

        # for more details see Algorithm 3.3, p50 from Rasmussen's Gaussian Processes
        while (not self._is_converged(f)):
            pi = softmax(f)
            E = list()
            z = list()
            for cls in xrange(n_classes):
                # compute pi and use it as Pi too
                pi_sqrt_cls = spm.diags(np.sqrt(pi[cls]), format='csc')
                # cholesky(I + D_c^(1/2) * K * D_c^(1/2))
                L = spl.cholesky(
                    (spm.identity(self.num_samples) + pi_sqrt_cls.dot(
                        spm.csc_matrix(
                            cov_matrix[cls]).dot(pi_sqrt_cls))).toarray(),
                    lower=True)
                # E_c = D_c^(1/2) * L^T \ (L \ D_c^(1/2))
                E.append((pi_sqrt_cls.dot(
                    spsl.spsolve(spm.csc_matrix(L.T),
                                 spsl.spsolve(spm.csc_matrix(L),
                                              pi_sqrt_cls)))).toarray())
                # z_c = sum_i log(L_ii)
                z.append(np.sum(np.log(np.diagonal(L))))
            E = np.asarray(E)
            # M = cholesky(sum_c E_c)
            M = spl.cholesky(np.sum(E, axis=0), lower=True)
            b = list()
            c = list()
            for cls in xrange(n_classes):
                # compute Pi * Pi^T * f, Note that Pi * Pi^T is symmetric -> possible optimization!
                PiPiTf_cls = np.zeros(self.num_samples)
                for cls_prime in xrange(n_classes):
                    PiPiTf_cls += pi[cls] * pi[cls_prime] * f[cls_prime]
                # b = (D - Pi * Pi^T) * f + y - pi
                b_cls = pi[cls] * f[
                    cls] - PiPiTf_cls + self.one_hot_targets[:, cls] - pi[cls]
                # c = E * K * b
                c_cls = E[cls].dot((cov_matrix[cls].dot(b_cls)))
                b.append(b_cls)
                c.append(c_cls)
            c = np.asarray(c)
            b = np.asarray(b)
            # a = b - c + E * R * M^T \ (M \ (R^T * c))
            a = (b.ravel() - c.ravel() + np.vstack(E).dot(spl.solve(M.T, spl.solve(M, np.sum(c, axis=0))))) \
                .reshape((n_classes, -1))
            # f = K * a
            for cls in xrange(n_classes):
                f[cls] = cov_matrix[cls].dot(a[cls])

        approx_log_marg_likelihood = -0.5 * a.ravel().dot(f.ravel()) \
                                     + self.one_hot_targets.T.ravel().dot(f.ravel()) \
                                     - np.sum(np.log(np.sum(np.exp(f), axis=0))) - np.sum(z)

        return f, approx_log_marg_likelihood