def __init__(self,
                 in_features,
                 out_features,
                 bias=True,
                 prior_sigma_1=0.1,
                 prior_sigma_2=0.4,
                 prior_pi=1,
                 posterior_mu_init=0,
                 posterior_rho_init=-6.0,
                 freeze=False,
                 prior_dist=None):
        super().__init__()

        #our main parameters
        self.in_features = in_features
        self.out_features = out_features
        self.bias = bias
        self.freeze = freeze

        self.posterior_mu_init = posterior_mu_init
        self.posterior_rho_init = posterior_rho_init

        #parameters for the scale mixture prior
        self.prior_sigma_1 = prior_sigma_1
        self.prior_sigma_2 = prior_sigma_2
        self.prior_pi = prior_pi
        self.prior_dist = prior_dist

        # Variational weight parameters and sample
        self.weight_mu = nn.Parameter(
            torch.Tensor(out_features,
                         in_features).normal_(posterior_mu_init, 0.1))
        self.weight_rho = nn.Parameter(
            torch.Tensor(out_features,
                         in_features).normal_(posterior_rho_init, 0.1))
        self.weight_sampler = GaussianVariational(self.weight_mu,
                                                  self.weight_rho)

        # Variational bias parameters and sample
        if self.bias:
            self.bias_mu = nn.Parameter(
                torch.Tensor(out_features).normal_(posterior_mu_init, 0.1))
            self.bias_rho = nn.Parameter(
                torch.Tensor(out_features).normal_(posterior_rho_init, 0.1))
            self.bias_sampler = GaussianVariational(self.bias_mu,
                                                    self.bias_rho)

        # Priors (as BBP paper)
        self.weight_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                   self.prior_sigma_1,
                                                   self.prior_sigma_2,
                                                   dist=self.prior_dist)
        if self.bias:
            self.bias_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                     self.prior_sigma_1,
                                                     self.prior_sigma_2,
                                                     dist=self.prior_dist)
        self.log_prior = 0
        self.log_variational_posterior = 0
Exemplo n.º 2
0
    def __init__(self,
                 in_features,
                 out_features,
                 bias=True,
                 prior_sigma_1=1,
                 prior_sigma_2=0.002,
                 prior_pi=0.5,
                 freeze=False):

        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.use_bias = bias
        self.freeze = freeze

        self.prior_sigma_1 = prior_sigma_1
        self.prior_sigma_2 = prior_sigma_2
        self.prior_pi = prior_pi

        # Variational weight parameters and sample for weight ih
        self.weight_ih_mu = nn.Parameter(
            torch.Tensor(in_features, out_features * 4).uniform_(-0.2, 0.2))
        self.weight_ih_rho = nn.Parameter(
            torch.Tensor(in_features, out_features * 4).uniform_(-5, -4))
        self.weight_ih_sampler = GaussianVariational(self.weight_ih_mu,
                                                     self.weight_ih_rho)
        self.weight_ih = None

        # Variational weight parameters and sample for weight hh
        self.weight_hh_mu = nn.Parameter(
            torch.Tensor(out_features, out_features * 4).uniform_(-0.2, 0.2))
        self.weight_hh_rho = nn.Parameter(
            torch.Tensor(out_features, out_features * 4).uniform_(-5, -4))
        self.weight_hh_sampler = GaussianVariational(self.weight_hh_mu,
                                                     self.weight_hh_rho)
        self.weight_hh = None

        # Variational weight parameters and sample for bias
        self.bias_mu = nn.Parameter(
            torch.Tensor(out_features * 4).uniform_(-0.2, 0.2))
        self.bias_rho = nn.Parameter(
            torch.Tensor(out_features * 4).uniform_(-5, -4))
        self.bias_sampler = GaussianVariational(self.bias_mu, self.bias_rho)
        self.bias = None

        #our prior distributions
        self.weight_ih_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                      self.prior_sigma_1,
                                                      self.prior_sigma_2)
        self.weight_hh_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                      self.prior_sigma_1,
                                                      self.prior_sigma_2)
        self.bias_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                 self.prior_sigma_1,
                                                 self.prior_sigma_2)

        self.log_prior = 0
        self.log_variational_posterior = 0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 groups=1,
                 stride=1,
                 padding=0,
                 dilation=1,
                 bias=True,
                 prior_sigma_1=1,
                 prior_sigma_2=0.002,
                 prior_pi=0.5,
                 freeze=False):
        super().__init__()

        #our main parameters
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.freeze = freeze
        self.kernel_size = kernel_size
        self.groups = groups
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.bias = bias

        #parameters for the scale mixture prior
        self.prior_sigma_1 = prior_sigma_1
        self.prior_sigma_2 = prior_sigma_2
        self.prior_pi = prior_pi

        #our weights
        self.weight_mu = nn.Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *kernel_size).uniform_(-1, 1))
        self.weight_rho = nn.Parameter(
            torch.Tensor(out_channels, in_channels // groups,
                         *kernel_size).uniform_(-1, 1))
        self.weight_sampler = GaussianVariational(self.weight_mu,
                                                  self.weight_rho)

        #our biases
        self.bias_mu = nn.Parameter(torch.Tensor(out_channels).uniform_(-1, 1))
        self.bias_rho = nn.Parameter(
            torch.Tensor(out_channels).uniform_(-1, 1))
        self.bias_sampler = GaussianVariational(self.bias_mu, self.bias_rho)

        # Priors (as BBP paper)
        self.weight_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                   self.prior_sigma_1,
                                                   self.prior_sigma_2)
        self.bias_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                 self.prior_sigma_1,
                                                 self.prior_sigma_2)
        self.log_prior = 0
        self.log_variational_posterior = 0
    def __init__(self,
                 in_features,
                 out_features,
                 bias=True,
                 prior_sigma_1=1,
                 prior_sigma_2=0.002,
                 prior_pi=0.5,
                 freeze=False):
        super().__init__()

        #our main parameters
        self.in_features = in_features
        self.out_features = out_features
        self.bias = bias
        self.freeze = freeze

        #parameters for the scale mixture prior
        self.prior_sigma_1 = prior_sigma_1
        self.prior_sigma_2 = prior_sigma_2
        self.prior_pi = prior_pi

        # Variational weight parameters and sample
        self.weight_mu = nn.Parameter(
            torch.Tensor(out_features, in_features).uniform_(-0.2, 0.2))
        self.weight_rho = nn.Parameter(
            torch.Tensor(out_features, in_features).uniform_(-5, -4))
        self.weight_sampler = GaussianVariational(self.weight_mu,
                                                  self.weight_rho)

        # Variational bias parameters and sample
        self.bias_mu = nn.Parameter(
            torch.Tensor(out_features).uniform_(-0.2, 0.2))
        self.bias_rho = nn.Parameter(
            torch.Tensor(out_features).uniform_(-5, -4))
        self.bias_sampler = GaussianVariational(self.bias_mu, self.bias_rho)

        # Priors (as BBP paper)
        self.weight_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                   self.prior_sigma_1,
                                                   self.prior_sigma_2)
        self.bias_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                 self.prior_sigma_1,
                                                 self.prior_sigma_2)
        self.log_prior = 0
        self.log_variational_posterior = 0
    def __init__(self,
                 num_embeddings,
                 embedding_dim,
                 padding_idx=None,
                 max_norm=None,
                 norm_type=2.,
                 scale_grad_by_freq=False,
                 sparse=False,
                 prior_sigma_1=0.1,
                 prior_sigma_2=0.002,
                 prior_pi=1,
                 posterior_mu_init=0,
                 posterior_rho_init=-6.0,
                 freeze=False,
                 prior_dist=None):
        super().__init__()

        self.freeze = freeze

        #parameters for the scale mixture prior
        self.prior_sigma_1 = prior_sigma_1
        self.prior_sigma_2 = prior_sigma_2
        self.posterior_mu_init = posterior_mu_init
        self.posterior_rho_init = posterior_rho_init

        self.prior_pi = prior_pi
        self.prior_dist = prior_dist

        self.num_embeddings = num_embeddings
        self.embedding_dim = embedding_dim
        self.padding_idx = padding_idx
        self.max_norm = max_norm
        self.norm_type = norm_type
        self.scale_grad_by_freq = scale_grad_by_freq
        self.sparse = sparse

        # Variational weight parameters and sample
        self.weight_mu = nn.Parameter(
            torch.Tensor(num_embeddings,
                         embedding_dim).normal_(posterior_mu_init, 0.1))
        self.weight_rho = nn.Parameter(
            torch.Tensor(num_embeddings,
                         embedding_dim).normal_(posterior_rho_init, 0.1))
        self.weight_sampler = GaussianVariational(self.weight_mu,
                                                  self.weight_rho)

        # Priors (as BBP paper)
        self.weight_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                   self.prior_sigma_1,
                                                   self.prior_sigma_2,
                                                   dist=self.prior_dist)
        self.log_prior = 0
        self.log_variational_posterior = 0
    def test_scale_mixture_any_prior(self):
        mu = torch.Tensor(10, 10).uniform_(-1, 1)
        rho = torch.Tensor(10, 10).uniform_(-1, 1)

        dist = GaussianVariational(mu, rho)
        s1 = dist.sample()

        log_posterior = dist.log_posterior()

        prior_dist = ScaleMixturePrior(dist=torch.distributions.studentT.StudentT(1, 1))
        log_prior = prior_dist.log_prior(s1)

        #print(log_prior)
        #print(log_posterior)
        self.assertEqual(log_prior == log_prior, torch.tensor(True))
        self.assertEqual(log_posterior <= log_posterior - log_prior, torch.tensor(True))
        pass
Exemplo n.º 7
0
    def __init__(self,
                 in_features,
                 out_features,
                 bias=True,
                 prior_sigma_1=0.1,
                 prior_sigma_2=0.002,
                 prior_pi=1,
                 posterior_mu_init=0,
                 posterior_rho_init=-6.0,
                 freeze=False,
                 prior_dist=None):

        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.use_bias = bias
        self.freeze = freeze

        self.posterior_mu_init = posterior_mu_init
        self.posterior_rho_init = posterior_rho_init

        self.prior_sigma_1 = prior_sigma_1
        self.prior_sigma_2 = prior_sigma_2
        self.prior_pi = prior_pi
        self.prior_dist = prior_dist

        # Variational weight parameters and sample for weight ih
        self.weight_ih_mu = nn.Parameter(
            torch.Tensor(in_features,
                         out_features * 4).normal_(posterior_mu_init, 0.1))
        self.weight_ih_rho = nn.Parameter(
            torch.Tensor(in_features,
                         out_features * 4).normal_(posterior_rho_init, 0.1))
        self.weight_ih_sampler = GaussianVariational(self.weight_ih_mu,
                                                     self.weight_ih_rho)
        self.weight_ih = None

        # Variational weight parameters and sample for weight hh
        self.weight_hh_mu = nn.Parameter(
            torch.Tensor(out_features,
                         out_features * 4).normal_(posterior_mu_init, 0.1))
        self.weight_hh_rho = nn.Parameter(
            torch.Tensor(out_features,
                         out_features * 4).normal_(posterior_rho_init, 0.1))
        self.weight_hh_sampler = GaussianVariational(self.weight_hh_mu,
                                                     self.weight_hh_rho)
        self.weight_hh = None

        # Variational weight parameters and sample for bias
        self.bias_mu = nn.Parameter(
            torch.Tensor(out_features * 4).normal_(posterior_mu_init, 0.1))
        self.bias_rho = nn.Parameter(
            torch.Tensor(out_features * 4).normal_(posterior_rho_init, 0.1))
        self.bias_sampler = GaussianVariational(self.bias_mu, self.bias_rho)
        self.bias = None

        #our prior distributions
        self.weight_ih_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                      self.prior_sigma_1,
                                                      self.prior_sigma_2,
                                                      dist=self.prior_dist)
        self.weight_hh_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                      self.prior_sigma_1,
                                                      self.prior_sigma_2,
                                                      dist=self.prior_dist)
        self.bias_prior_dist = ScaleMixturePrior(self.prior_pi,
                                                 self.prior_sigma_1,
                                                 self.prior_sigma_2,
                                                 dist=self.prior_dist)

        self.log_prior = 0
        self.log_variational_posterior = 0