Пример #1
0
class LinearGaussian(GaussianBNN):
    def __init__(self, device, hyperparameters):
        super(GaussianBNN, self).__init__()
        self.name = 'LinearGaussian'
        self.device = device
        self.hyperparams = hyperparameters

        dir_ = os.getcwd().split('horseshoe_bnn')[0]
        path = f"{dir_}models/{self.hyperparams.dataset_name}/{self.name}" \
               f"/{self.hyperparams.timestamp}"
        self.train_writer = SummaryWriter(path + '/train')
        self.test_writer = SummaryWriter(path + '/test')

        self.layer = BayesianLayer(self.hyperparams.n_features, 1, self.hyperparams, device)
        self.log_var_noise = torch.log(torch.Tensor([self.hyperparams.var_noise]))

    def forward(self, x, sample=False, n_samples=1):
        x = self.layer.forward(x, sample, n_samples)
        return x

    def log_prior(self):
        """
        Calculates the logarithm of the current
        value of the prior distribution over the weights
        """
        return self.layer.log_prior

    def log_variational_posterior(self):
        """
        Calculates the logarithm of the current value
        of the variational posterior distribution over the weights
        """
        return self.layer.log_variational_posterior
Пример #2
0
class GaussianBNN(nn.Module, Model):
    def __init__(self, device, hyperparameters):
        super(GaussianBNN, self).__init__()
        self.name = 'GaussianBNN'
        self.device = device
        self.hyperparams = hyperparameters

        dir_ = os.getcwd().split('horseshoe_bnn')[0]
        path = f"{dir_}models/{self.hyperparams.dataset_name}/{self.name}" \
               f"/{self.hyperparams.timestamp}"
        self.train_writer = SummaryWriter(path + '/train')
        self.test_writer = SummaryWriter(path + '/test')

        self.l1 = BayesianLayer(self.hyperparams.n_features, self.hyperparams.n_hidden_units, self.hyperparams, device)
        self.l2 = BayesianLayer(self.hyperparams.n_hidden_units, 1, self.hyperparams, device)
        self.log_var_noise = torch.log(torch.Tensor([self.hyperparams.var_noise]))

    def initialize(self, n_features):
        """
        Reset model parameters
        """
        self.__init__(self.device, self.hyperparams)
        return self

    def forward(self, x, sample, n_samples):
        x = F.relu(self.l1.forward(x, n_samples=n_samples))
        x = self.l2.forward(x, n_samples=n_samples)
        return x

    def log_prior(self):
        """
        Calculates the logarithm of the current
        value of the prior distribution over the weights
        """
        return self.l1.log_prior \
               + self.l2.log_prior

    def log_variational_posterior(self):
        """
        Calculates the logarithm of the current value
        of the variational posterior distribution over the weights
        """

        return self.l1.log_variational_posterior \
               + self.l2.log_variational_posterior

    def sample_elbo(self, input_, target, dataset_size):
        """
        Computes an estimate of the evidence lower bound using Monte Carlo sampling.

        The evidence lower bound is approximated as follows:
        Multiple samples are drawn from the variational posterior over the weights.

        For each sample:
        - The given input batch is forwarded through the resulting network
        - The current value of the prior distribution is computed
        - The current value of the variational posterior distribution is computed

        The final approximation of the lower bound is given by
        1. Averaging over the computed prior distribution values and variational posterior values
        2. Computing the value of the log likelihood
        3. Computing the value of the ELBO
        """

        batch_size = target.size()[0]
        n_samples = self.hyperparams.n_samples

        outputs = self.forward(input_, sample=True, n_samples=n_samples)
        outputs = outputs.reshape(n_samples, batch_size)
        log_prior = self.log_prior() / n_samples
        log_variational_posterior = self.log_variational_posterior() / n_samples

        var_noise = torch.exp(self.log_var_noise)
        if self.device.type == 'cuda':
            var_noise = var_noise.cuda()

        log_likelihoods = compute_log_likelihoods(self.hyperparams.classification, outputs, target, n_samples, var_noise)

        outputs = outputs.t()

        log_likelihood = log_likelihoods.mean()

        loss = (log_variational_posterior - log_prior) * batch_size / dataset_size
        if self.device.type == 'cuda':
            loss = loss.cuda()

        loss -= log_likelihood

        return loss, log_prior, log_variational_posterior, log_likelihood, outputs


    def train_model(self, dataset, epoch, optimizer, visualize_errors=False):
        """
        Trains a given model for a given number of epochs on a given dataset.
        """
        # self.train()
        super().train()

        dataset_size = dataset.features.shape[0]
        batch_size = self.hyperparams.batch_size

        # transform dataset to dataloader
        train_loader = dataset_to_dataloader(dataset, batch_size=batch_size)
        n_train_samples = len(train_loader.dataset)
        n_batches = len(train_loader)

        mse = 0
        mae = 0
        total_loss = 0
        total_log_likelihood = 0
        total_kl_divergence = 0

        for batch_idx, (data, target) in enumerate(train_loader):

            data, target = data.to(self.device), target.to(self.device)
            self.zero_grad()
            loss, log_prior, log_variational_posterior, log_likelihood, outputs = self.sample_elbo(data, target, dataset_size)

            loss.backward()
            optimizer.step()

            # Given all model outputs, compute the mean output of the ensemble
            mean_output = outputs.mean(dim=1)

            if not self.hyperparams.classification:
                device_type = self.device.type
                mse, mae = update_mse_mae(mse, mae, device_type, mean_output, target)

            total_loss += loss
            total_log_likelihood += -log_likelihood
            total_kl_divergence += (log_variational_posterior - log_prior) * target.size()[0] / dataset_size

        rmse = np.sqrt(mse / dataset_size)
        mae /= dataset_size

        if visualize_errors:
            self.train_writer.add_scalar('loss__training loss', total_loss.item(), epoch)
            self.train_writer.add_scalar('loss__kl term' , total_kl_divergence.item(), epoch)
            self.train_writer.add_scalar('loss__log_likelihood term', total_log_likelihood.item(), epoch)
            if not self.hyperparams.classification:
                self.train_writer.add_scalar('errors__mae', mae, epoch)
                self.train_writer.add_scalar('errors__rmse', rmse, epoch)

        return loss, rmse, mae


    def predict(self, dataset, epoch=1, mean_y_train=0, std_y_train=1, visualize_errors=False):
        """
        Evaluates an ensemble of networks on a given test dataset.

        Because the Bayesian Neural Network has a distribution over the weights,
        we basically have an infinite number of different neural networks. We can
        take advantage of that by using an ensemble of networks during prediction.
        Each model in the ensemble performs a prediction. The different predictions
        are then averaged to give a final output.

        A model can be obtained by sampling weights from the distribution. Note: for
        each input batch, new models will be sampled.
        """
        n_samples_testing = self.hyperparams.n_samples_testing
        dataset_size = dataset.features.shape[0]
        test_batch_size = dataset_size

        # transform dataset to dataloader
        test_loader = dataset_to_dataloader(dataset, batch_size=test_batch_size, shuffle=False)
        n_test_samples = len(test_loader.dataset)
        n_test_batches = len(test_loader)

        super(GaussianBNN, self).eval()
        mse = 0
        rmse = 0
        mae = 0
        loglike = 0

        all_predicted_distributions = []
        means = []

        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(self.device), target.to(self.device)

                # Each batch is forwarded through each model in the ensemble
                # and the model outputs are saved.
                ensemble_outputs = self.forward(data, sample=True, n_samples=n_samples_testing) * std_y_train + mean_y_train
                ensemble_outputs = ensemble_outputs.reshape(n_samples_testing, test_batch_size).t()

                # calculation of the predictive log likelihood of a batch, see notes from 18.12.18
                var_noise = np.exp(self.log_var_noise.detach().numpy()) * std_y_train ** 2

                if self.hyperparams.classification:
                    loglike_factor = - F.binary_cross_entropy_with_logits(ensemble_outputs, target.reshape(-1,1).repeat(1,n_samples_testing), reduction='none')
                    loglike = torch.sum(torch.logsumexp(loglike_factor - math.log(n_samples_testing), 1))
                    # Given all model outputs, compute the mean output of the ensemble
                    mean_output = ensemble_outputs.mean(1)
                else:
                    if self.device.type == 'cuda':
                        target = target.cpu().numpy()
                        ensemble_outputs = ensemble_outputs.cpu().numpy()
                    log_factor = -0.5 * np.log(2 * math.pi * var_noise) - (np.tile(target.reshape(-1, 1), (1, n_samples_testing)) - np.array(ensemble_outputs))**2 / (2* var_noise)
                    loglike += np.sum(logsumexp(log_factor - np.log(n_samples_testing), 1))
                    # Given all model outputs, compute the mean output of the ensemble
                    mean_output = ensemble_outputs.mean(1)
                    if self.device.type == 'cuda':
                        target = torch.from_numpy(target)
                        mean_output = torch.from_numpy(mean_output)

                if self.hyperparams.classification:
                    distributions = [BinarySampleDistribution(1 / (1 + np.exp(-e))) for e in ensemble_outputs.cpu().detach().numpy()]

                else:
                    mse += F.mse_loss(mean_output, target, reduction='sum')
                    mae += F.l1_loss(mean_output, target, reduction='sum')

                    distributions = [SampleDistribution(ensemble_outputs[i], var_noise)
                                        for i in range(test_batch_size)]

                all_predicted_distributions.extend(distributions)

        predicted_distr = PredictiveDistribution(all_predicted_distributions)

        loglike /= dataset_size
        rmse = np.sqrt(mse / dataset_size)
        mae /= dataset_size

        if self.hyperparams.classification:
            zero_one = AllMetrics.zero_one_loss.compute(target.cpu().detach().numpy(), predicted_distr)

        if visualize_errors:
            self.test_writer.add_scalar('errors__predictive log likelihood', loglike, epoch)
            if self.hyperparams.classification:
                self.test_writer.add_scalar('errors__zero_one', zero_one, epoch)
            else:
                self.test_writer.add_scalar('errors__mae', mae, epoch)
                self.test_writer.add_scalar('errors__rmse', rmse, epoch)


        return predicted_distr, rmse, mae, -loglike