def __init__(self, x, layers, num_components=100, device=None, old=False): super(VAE_bodies, self).__init__() self.device = device self.p = int(layers[0]) # Dimension of x self.d = int(layers[-1]) # Dimension of z self.h = layers # [1:-1] # Dimension of hidden layers self.num_components = num_components enc = [] for k in range(len(layers) - 1): in_features = int(layers[k]) out_features = int(layers[k + 1]) enc.append( nnj.ResidualBlock(nnj.Linear(in_features, out_features), nnj.Softplus())) enc.append(nnj.Linear(out_features, int(self.d * 2))) dec = [] for k in reversed(range(len(layers) - 1)): in_features = int(layers[k + 1]) out_features = int(layers[k]) if not old: # temporary to load old models TODO: delete if out_features != layers[0]: dec.append( nnj.ResidualBlock( nnj.Linear(in_features, out_features), nnj.Softplus())) else: dec.append( nnj.ResidualBlock( nnj.Linear(in_features, out_features), nnj.Sigmoid())) else: dec.append( nnj.ResidualBlock(nnj.Linear(in_features, out_features), nnj.Softplus())) if out_features == layers[0]: dec.append(nnj.Sigmoid()) # Note how we use 'nnj' instead of 'nn' -- this gives automatic # computation of Jacobians of the implemented neural network. # The embed function is required to also return Jacobians if # requested; by using 'nnj' this becomes a trivial constraint. self.encoder = nnj.Sequential(*enc) self.decoder_loc = nnj.Sequential(*dec) self.init_decoder_scale = 0.01 * torch.ones(self.p, device=self.device) self.prior_loc = torch.zeros(self.d, device=self.device) self.prior_scale = torch.ones(self.d, device=self.device) self.prior = td.Independent( td.Normal(loc=self.prior_loc, scale=self.prior_scale), 1) # Create a blank std-network. # It is important to call init_std after training the mean, but before training the std self.dec_std = None self.to(self.device)
def init_std(self, x, gmm_mu=None, gmm_cv=None, weights=None, beta_constant=0.5, beta_override=None, inv_maxstd=7.5e-2, n_samples=2, num_components=None): self.beta_constant = beta_constant if num_components is not None: self.num_components = num_components N, D = x.shape with torch.no_grad(): z = self.encode(x.to(self.device)).sample([n_samples]).reshape( n_samples * N, 2) d = z.shape[1] inv_maxstd = inv_maxstd # 1.0 / x.std(dim=0).mean() # x.std(dim=0).mean() #D*x.var(dim=0).mean() if gmm_mu is None and gmm_cv is None and weights is None: from sklearn import mixture clf = mixture.GaussianMixture(n_components=self.num_components, covariance_type='spherical') clf.fit(z.cpu().numpy()) self.gmm_means = clf.means_ self.gmm_covariances = clf.covariances_ self.clf_weights = clf.weights_ else: print('loading weights...') self.gmm_means = gmm_mu self.gmm_covariances = gmm_cv self.clf_weights = weights if beta_override is None: self.beta = beta_constant / torch.tensor( self.gmm_covariances, dtype=torch.float, requires_grad=False) else: self.beta = beta_override self.beta = self.beta.to(self.device) self.dec_std = nnj.Sequential( nnj.RBF(d, self.num_components, points=torch.tensor(self.gmm_means, dtype=torch.float, requires_grad=False), beta=self.beta), # d --> num_components nnj.PosLinear(self.num_components, 1, bias=False), # num_components --> 1 nnj.Reciprocal(inv_maxstd), # 1 --> 1 nnj.PosLinear(1, D)).to(self.device) # 1 --> D with torch.no_grad(): self.dec_std[1].weight[:] = ( (torch.tensor(self.clf_weights, dtype=torch.float).exp() - 1.0).log()).to(self.device) self.dec_std
def init_std_naive(self): dec = [ nnj.Linear(self.latent_space, self.hidden_layer[-1]), nnj.Softplus() ] for i in reversed(range(1, len(self.hidden_layer))): dec.append( nnj.ResidualBlock( nnj.Linear(self.hidden_layer[i], self.hidden_layer[i - 1]), nnj.Softplus())) dec.extend([nnj.Linear(self.hidden_layer[0], 784)]) self.decoder_std = nnj.Sequential(*dec).to(self.device)
def __init__(self, hidden_layer=[512, 256], latent_space=2): super(BasicVAE, self).__init__() self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.hidden_layer = hidden_layer self.latent_space = latent_space self.prior_loc = torch.zeros(latent_space, device=self.device) self.prior_scale = torch.ones(latent_space, device=self.device) self.prior = td.Independent( td.Normal(loc=self.prior_loc, scale=self.prior_scale), 1) enc = [ nnj.ResidualBlock(nnj.Linear(784, hidden_layer[0]), nnj.Softplus()) ] for i in range(len(hidden_layer) - 1): enc.append( nnj.ResidualBlock( nnj.Linear(hidden_layer[i], hidden_layer[i + 1]), nnj.Softplus())) enc.append(nnj.Linear(hidden_layer[-1], int(latent_space * 2))) self.encoder = nnj.Sequential(*enc) dec = [ nnj.ResidualBlock(nnj.Linear(2, hidden_layer[0]), nnj.Softplus()) ] for i in reversed(range(1, len(hidden_layer))): dec.append( nnj.ResidualBlock( nnj.Linear(hidden_layer[i], hidden_layer[i - 1]), nnj.Softplus())) dec.extend([ nnj.ResidualBlock(nnj.Linear(hidden_layer[0], 784), nnj.Sigmoid()) ]) self.decoder_loc = nnj.Sequential(*dec) # self.decoder_loc = nnj.Sequential(nnj.ResidualBlock(nnj.Linear(hidden_layer[0], 784), nnj.Sigmoid())) self.init_decoder_scale = 0.01 * torch.ones(784, device=self.device) self.decoder_std = None
def load_std(self, x, gmm_mu=None, gmm_cv=None, weights=None, inv_maxstd=1e-1, beta_constant=0.5, beta_values=None, z_override=None, sigma=None): """ messy, needs clean separation between init and load """ N, D = x.shape print('loading weights...') self.gmm_means = gmm_mu self.gmm_covariances = gmm_cv self.clf_weights = weights d = self.gmm_means.shape[1] if beta_values is None: beta = beta_constant.cpu() / torch.tensor( self.gmm_covariances, dtype=torch.float, requires_grad=False) else: beta = beta_values self.beta = beta.to(self.device) self.dec_std = nnj.Sequential( nnj.RBF(d, self.num_components, points=torch.tensor(self.gmm_means, dtype=torch.float, requires_grad=False), beta=self.beta), # d --> num_components nnj.PosLinear(self.num_components, 1, bias=False), # num_components --> 1 nnj.Reciprocal(inv_maxstd), # 1 --> 1 nnj.PosLinear(1, D)).to(self.device) # 1 --> D
def init_std(self, x, gmm_mu=None, gmm_cv=None, weights=None, inv_maxstd=1e-1, beta_constant=0.5, component_overwrite=None, beta_override=None, n_samples=2, z_override=None, sigma=None): if component_overwrite is not None: self.num_components = component_overwrite if z_override is None: with torch.no_grad(): mu, lv = torch.chunk(self.encoder(x.to(self.device)), chunks=2, dim=-1) z = td.Normal(loc=mu, scale=lv.mul(0.5).exp() + 1e-10).sample( [n_samples]) z = z.reshape(int(x.shape[0] * n_samples), z.shape[-1]) else: z = z_override N, D = x.shape d = z.shape[1] inv_maxstd = inv_maxstd # 1.0 / x.std(dim=0).mean() # x.std(dim=0).mean() #D*x.var(dim=0).mean() if gmm_mu is None and gmm_cv is None and weights is None: from sklearn import mixture clf = mixture.GaussianMixture(n_components=self.num_components, covariance_type='spherical') clf.fit(z.cpu().numpy()) self.gmm_means = clf.means_ self.gmm_covariances = clf.covariances_ self.clf_weights = clf.weights_ else: print('loading weights...') self.gmm_means = gmm_mu self.gmm_covariances = gmm_cv self.clf_weights = weights if beta_override is None: beta = beta_constant.cpu() / torch.tensor( self.gmm_covariances, dtype=torch.float, requires_grad=False) else: beta = beta_override self.beta = beta.to(self.device) self.dec_std = nnj.Sequential( nnj.RBF(d, self.num_components, points=torch.tensor(self.gmm_means, dtype=torch.float, requires_grad=False), beta=self.beta), # d --> num_components nnj.PosLinear(self.num_components, 1, bias=False), # num_components --> 1 nnj.Reciprocal(inv_maxstd), # 1 --> 1 nnj.PosLinear(1, D)).to(self.device) # 1 --> D if sigma is not None: self.dec_std[0] = nnj.RBF_variant( d, self.gmm_means.shape[0], points=torch.tensor(self.gmm_means, dtype=torch.float, requires_grad=False), beta=self.beta.requires_grad_(False), boxwidth=sigma).to(self.device) with torch.no_grad(): self.dec_std[1].weight[:] = ( (torch.tensor(self.clf_weights, dtype=torch.float).exp() - 1.0).log()).to(self.device)