Пример #1
0
    def initialize(self,
                   input_dim: int,
                   hidden_dim: int,
                   init_scale: float = 0.001,
                   basis: coo_matrix = None,
                   encoder_depth: int = 1,
                   imputer: Callable[[torch.Tensor], torch.Tensor] = None,
                   batch_size: int = 10,
                   bias=True):
        self.hidden_dim = hidden_dim
        self.bias = bias
        # Psi must be dimension D - 1 x D
        if basis is None:
            tree = random_linkage(input_dim)
            basis = sparse_balance_basis(tree)[0].copy()
        indices = np.vstack((basis.row, basis.col))
        Psi = torch.sparse_coo_tensor(indices.copy(),
                                      basis.data.astype(np.float32).copy(),
                                      requires_grad=False)

        # Psi.requires_grad = False
        self.input_dim = Psi.shape[0]
        if imputer is None:
            self.imputer = lambda x: x + 1

        if encoder_depth > 1:
            self.first_encoder = nn.Linear(self.input_dim,
                                           hidden_dim,
                                           bias=self.bias)
            num_encoder_layers = encoder_depth
            layers = []
            layers.append(self.first_encoder)
            for layer_i in range(num_encoder_layers - 1):
                layers.append(nn.Softplus())
                layers.append(nn.Linear(hidden_dim, hidden_dim,
                                        bias=self.bias))
            self.encoder = nn.Sequential(*layers)

            # initialize
            for encoder_layer in self.encoder:
                if isinstance(encoder_layer, nn.Linear):
                    encoder_layer.weight.data.normal_(0.0, init_scale)

        else:
            self.encoder = nn.Linear(self.input_dim,
                                     hidden_dim,
                                     bias=self.bias)
            self.encoder.weight.data.normal_(0.0, init_scale)

        self.decoder = nn.Linear(hidden_dim, self.input_dim, bias=False)
        self.variational_logvars = nn.Parameter(torch.zeros(hidden_dim))
        self.log_sigma_sq = nn.Parameter(torch.tensor(0.01))
        self.eta = nn.Parameter(torch.zeros(batch_size, self.input_dim))
        self.eta.data.normal_(0.0, init_scale)
        self.decoder.weight.data.normal_(0.0, init_scale)
        zI = torch.ones(self.hidden_dim).to(self.eta.device)
        zm = torch.zeros(self.hidden_dim).to(self.eta.device)
        self.register_buffer('Psi', Psi)
        self.register_buffer('zI', zI)
        self.register_buffer('zm', zm)
Пример #2
0
def get_basis(input_dim, basis=None):
    if basis is None:
        tree = random_linkage(input_dim)
        basis = sparse_balance_basis(tree)[0].copy()
    indices = np.vstack((basis.row, basis.col))
    Psi = torch.sparse_coo_tensor(
        indices.copy(), basis.data.astype(np.float32).copy(),
        requires_grad=False).coalesce()
    return Psi
Пример #3
0
    def __init__(self,
                 input_dim,
                 hidden_dim,
                 init_scale=0.001,
                 use_analytic_elbo=True,
                 encoder_depth=1,
                 likelihood='gaussian',
                 basis=None,
                 bias=False):
        super(LinearVAE, self).__init__()
        self.bias = bias
        self.hidden_dim = hidden_dim
        self.likelihood = likelihood
        self.use_analytic_elbo = use_analytic_elbo

        if basis is None:
            tree = random_linkage(input_dim)
            basis = sparse_balance_basis(tree)[0].copy()
        indices = np.vstack((basis.row, basis.col))
        Psi = torch.sparse_coo_tensor(indices.copy(),
                                      basis.data.astype(np.float32).copy(),
                                      requires_grad=False)
        self.input_dim = Psi.shape[0]
        self.register_buffer('Psi', Psi)

        if encoder_depth > 1:
            self.first_encoder = nn.Linear(self.input_dim,
                                           hidden_dim,
                                           bias=self.bias)
            num_encoder_layers = encoder_depth
            layers = []
            layers.append(self.first_encoder)
            for layer_i in range(num_encoder_layers - 1):
                layers.append(nn.Softplus())
                layers.append(nn.Linear(hidden_dim, hidden_dim,
                                        bias=self.bias))
            self.encoder = nn.Sequential(*layers)

            # initialize
            for encoder_layer in self.encoder:
                if isinstance(encoder_layer, nn.Linear):
                    encoder_layer.weight.data.normal_(0.0, init_scale)

        else:
            self.encoder = nn.Linear(self.input_dim,
                                     hidden_dim,
                                     bias=self.bias)
            self.encoder.weight.data.normal_(0.0, init_scale)

        self.decoder = nn.Linear(hidden_dim, self.input_dim, bias=self.bias)
        self.imputer = lambda x: x + 1
        self.variational_logvars = nn.Parameter(torch.zeros(hidden_dim))
        self.log_sigma_sq = nn.Parameter(torch.tensor(0.0))
Пример #4
0
def multinomial_bioms(k, D, N, M, min_sv=0.11, max_sv=5.0, sigma_sq=0.1):
    """ Simulates biom tables from multinomial.

    Parameters
    ----------
    k : int
       Number of latent dimensions.
    D : int
       Number of microbes.
    N : int
       Number of samples.
    M : int
       Average sequencing depth.

    Returns
    -------
    dict of np.array
       Ground truth parameters.
    """
    dims, hdims, total = D, k, N
    eigs = min_sv + (max_sv - min_sv) * np.linspace(0, 1, hdims)
    eigvectors = ortho_group.rvs(dims - 1)[:, :hdims]
    W = np.matmul(eigvectors, np.diag(np.sqrt(eigs - sigma_sq)))
    sigma_sq = sigma_sq
    sigma = np.sqrt(sigma_sq)
    z = np.random.normal(size=(total, hdims))
    eta = np.random.normal(np.matmul(z, W.T), sigma).astype(np.float32)
    tree = random_linkage(D)
    Psi = _balance_basis(tree)[0]
    prob = closure(np.exp(eta @ Psi))
    depths = np.random.poisson(M, size=N)
    Y = np.vstack([np.random.multinomial(depths[i], prob[i])
                   for i in range(N)])
    return dict(
        sigma=sigma,
        W=W,
        Psi=Psi,
        tree=tree,
        eta=eta,
        z=z,
        Y=Y,
        depths=depths,
        eigs=eigs,
        eigvectors=eigvectors
    )
Пример #5
0
def multinomial_batch_bioms(k, D, N, M, C=2,
                            min_sv=0.11, max_sv=5.0, sigma_sq=0.1):
    """ Simulates biom tables from multinomial with batch effects

    Parameters
    ----------
    k : int
       Number of latent dimensions.
    D : int
       Number of microbes.
    N : int
       Number of samples.
    M : int
       Average sequencing depth.
    C : int
       Number of batches.

    Returns
    -------
    dict of np.array
       Ground truth parameters.
    """
    dims, hdims, total = D, k, N
    eigs = min_sv + (max_sv - min_sv) * np.linspace(0, 1, hdims)
    eigvectors = ortho_group.rvs(dims - 1)[:, :hdims]
    W = np.matmul(eigvectors, np.diag(np.sqrt(eigs - sigma_sq)))
    sigma_sq = sigma_sq
    sigma = np.sqrt(sigma_sq)
    z = np.random.normal(size=(total, hdims))
    eta = np.random.normal(np.matmul(z, W.T), sigma).astype(np.float32)
    # Create ILR basis
    tree = random_linkage(D)
    Psi = _balance_basis(tree)[0]
    # add batch effects
    alpha = np.abs(np.random.normal(0, 0.5, size=(D)))
    alphaILR = np.abs(Psi)  @ alpha  # variances must always be positive
    m = np.zeros(D - 1)
    B = np.random.multivariate_normal(m, np.diag(alphaILR), size=C)
    batch_idx = np.random.randint(C, size=N)
    eta = np.vstack([eta[i] + B[batch_idx[i]] for i in range(N)])
    # Convert latent variables to observed counts
    prob = closure(np.exp(eta @ Psi))
    depths = np.random.poisson(M, size=N)
    Y = np.vstack([np.random.multinomial(depths[i], prob[i])
                   for i in range(N)])
    return dict(
        sigma=sigma,
        W=W,
        Psi=Psi,
        tree=tree,
        eta=eta,
        z=z,
        Y=Y,
        alpha=alpha,
        alphaILR=alphaILR,
        B=B,
        batch_idx=batch_idx,
        depths=depths,
        eigs=eigs,
        eigvectors=eigvectors
    )