def initialize(self, input_dim: int, hidden_dim: int, init_scale: float = 0.001, basis: coo_matrix = None, encoder_depth: int = 1, imputer: Callable[[torch.Tensor], torch.Tensor] = None, batch_size: int = 10, bias=True): self.hidden_dim = hidden_dim self.bias = bias # Psi must be dimension D - 1 x D if basis is None: tree = random_linkage(input_dim) basis = sparse_balance_basis(tree)[0].copy() indices = np.vstack((basis.row, basis.col)) Psi = torch.sparse_coo_tensor(indices.copy(), basis.data.astype(np.float32).copy(), requires_grad=False) # Psi.requires_grad = False self.input_dim = Psi.shape[0] if imputer is None: self.imputer = lambda x: x + 1 if encoder_depth > 1: self.first_encoder = nn.Linear(self.input_dim, hidden_dim, bias=self.bias) num_encoder_layers = encoder_depth layers = [] layers.append(self.first_encoder) for layer_i in range(num_encoder_layers - 1): layers.append(nn.Softplus()) layers.append(nn.Linear(hidden_dim, hidden_dim, bias=self.bias)) self.encoder = nn.Sequential(*layers) # initialize for encoder_layer in self.encoder: if isinstance(encoder_layer, nn.Linear): encoder_layer.weight.data.normal_(0.0, init_scale) else: self.encoder = nn.Linear(self.input_dim, hidden_dim, bias=self.bias) self.encoder.weight.data.normal_(0.0, init_scale) self.decoder = nn.Linear(hidden_dim, self.input_dim, bias=False) self.variational_logvars = nn.Parameter(torch.zeros(hidden_dim)) self.log_sigma_sq = nn.Parameter(torch.tensor(0.01)) self.eta = nn.Parameter(torch.zeros(batch_size, self.input_dim)) self.eta.data.normal_(0.0, init_scale) self.decoder.weight.data.normal_(0.0, init_scale) zI = torch.ones(self.hidden_dim).to(self.eta.device) zm = torch.zeros(self.hidden_dim).to(self.eta.device) self.register_buffer('Psi', Psi) self.register_buffer('zI', zI) self.register_buffer('zm', zm)
def get_basis(input_dim, basis=None): if basis is None: tree = random_linkage(input_dim) basis = sparse_balance_basis(tree)[0].copy() indices = np.vstack((basis.row, basis.col)) Psi = torch.sparse_coo_tensor( indices.copy(), basis.data.astype(np.float32).copy(), requires_grad=False).coalesce() return Psi
def __init__(self, input_dim, hidden_dim, init_scale=0.001, use_analytic_elbo=True, encoder_depth=1, likelihood='gaussian', basis=None, bias=False): super(LinearVAE, self).__init__() self.bias = bias self.hidden_dim = hidden_dim self.likelihood = likelihood self.use_analytic_elbo = use_analytic_elbo if basis is None: tree = random_linkage(input_dim) basis = sparse_balance_basis(tree)[0].copy() indices = np.vstack((basis.row, basis.col)) Psi = torch.sparse_coo_tensor(indices.copy(), basis.data.astype(np.float32).copy(), requires_grad=False) self.input_dim = Psi.shape[0] self.register_buffer('Psi', Psi) if encoder_depth > 1: self.first_encoder = nn.Linear(self.input_dim, hidden_dim, bias=self.bias) num_encoder_layers = encoder_depth layers = [] layers.append(self.first_encoder) for layer_i in range(num_encoder_layers - 1): layers.append(nn.Softplus()) layers.append(nn.Linear(hidden_dim, hidden_dim, bias=self.bias)) self.encoder = nn.Sequential(*layers) # initialize for encoder_layer in self.encoder: if isinstance(encoder_layer, nn.Linear): encoder_layer.weight.data.normal_(0.0, init_scale) else: self.encoder = nn.Linear(self.input_dim, hidden_dim, bias=self.bias) self.encoder.weight.data.normal_(0.0, init_scale) self.decoder = nn.Linear(hidden_dim, self.input_dim, bias=self.bias) self.imputer = lambda x: x + 1 self.variational_logvars = nn.Parameter(torch.zeros(hidden_dim)) self.log_sigma_sq = nn.Parameter(torch.tensor(0.0))
def multinomial_bioms(k, D, N, M, min_sv=0.11, max_sv=5.0, sigma_sq=0.1): """ Simulates biom tables from multinomial. Parameters ---------- k : int Number of latent dimensions. D : int Number of microbes. N : int Number of samples. M : int Average sequencing depth. Returns ------- dict of np.array Ground truth parameters. """ dims, hdims, total = D, k, N eigs = min_sv + (max_sv - min_sv) * np.linspace(0, 1, hdims) eigvectors = ortho_group.rvs(dims - 1)[:, :hdims] W = np.matmul(eigvectors, np.diag(np.sqrt(eigs - sigma_sq))) sigma_sq = sigma_sq sigma = np.sqrt(sigma_sq) z = np.random.normal(size=(total, hdims)) eta = np.random.normal(np.matmul(z, W.T), sigma).astype(np.float32) tree = random_linkage(D) Psi = _balance_basis(tree)[0] prob = closure(np.exp(eta @ Psi)) depths = np.random.poisson(M, size=N) Y = np.vstack([np.random.multinomial(depths[i], prob[i]) for i in range(N)]) return dict( sigma=sigma, W=W, Psi=Psi, tree=tree, eta=eta, z=z, Y=Y, depths=depths, eigs=eigs, eigvectors=eigvectors )
def multinomial_batch_bioms(k, D, N, M, C=2, min_sv=0.11, max_sv=5.0, sigma_sq=0.1): """ Simulates biom tables from multinomial with batch effects Parameters ---------- k : int Number of latent dimensions. D : int Number of microbes. N : int Number of samples. M : int Average sequencing depth. C : int Number of batches. Returns ------- dict of np.array Ground truth parameters. """ dims, hdims, total = D, k, N eigs = min_sv + (max_sv - min_sv) * np.linspace(0, 1, hdims) eigvectors = ortho_group.rvs(dims - 1)[:, :hdims] W = np.matmul(eigvectors, np.diag(np.sqrt(eigs - sigma_sq))) sigma_sq = sigma_sq sigma = np.sqrt(sigma_sq) z = np.random.normal(size=(total, hdims)) eta = np.random.normal(np.matmul(z, W.T), sigma).astype(np.float32) # Create ILR basis tree = random_linkage(D) Psi = _balance_basis(tree)[0] # add batch effects alpha = np.abs(np.random.normal(0, 0.5, size=(D))) alphaILR = np.abs(Psi) @ alpha # variances must always be positive m = np.zeros(D - 1) B = np.random.multivariate_normal(m, np.diag(alphaILR), size=C) batch_idx = np.random.randint(C, size=N) eta = np.vstack([eta[i] + B[batch_idx[i]] for i in range(N)]) # Convert latent variables to observed counts prob = closure(np.exp(eta @ Psi)) depths = np.random.poisson(M, size=N) Y = np.vstack([np.random.multinomial(depths[i], prob[i]) for i in range(N)]) return dict( sigma=sigma, W=W, Psi=Psi, tree=tree, eta=eta, z=z, Y=Y, alpha=alpha, alphaILR=alphaILR, B=B, batch_idx=batch_idx, depths=depths, eigs=eigs, eigvectors=eigvectors )