示例#1
0
    def generate(
        self,
        n_samples: int = 100,
        genes: Union[list, np.ndarray] = None,
        batch_size: int = 64,
        #batch_size: int = 128,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Create observation samples from the Posterior Predictive distribution

        :param n_samples: Number of required samples for each cell
        :param genes: Indices of genes of interest
        :param batch_size: Desired Batch size to generate data

        :return: Tuple (x_new, x_old)
            Where x_old has shape (n_cells, n_genes)
            Where x_new has shape (n_cells, n_genes, n_samples)
        """
        assert self.model.reconstruction_loss in ["zinb", "nb"]
        zero_inflated = self.model.reconstruction_loss == "zinb"
        x_old = []
        x_new = []
        for tensors in self.update({"batch_size": batch_size}):
            sample_batch, _, _, batch_index, labels = tensors
            outputs = self.model.inference(sample_batch,
                                           batch_index=batch_index,
                                           y=labels,
                                           n_samples=n_samples)
            px_r = outputs["px_r"]
            px_rate = outputs["px_rate"]
            px_dropout = outputs["px_dropout"]

            p = px_rate / (px_rate + px_r)
            r = px_r
            # Important remark: Gamma is parametrized by the rate = 1/scale!
            l_train = distributions.Gamma(concentration=r,
                                          rate=(1 - p) / p).sample()
            # Clamping as distributions objects can have buggy behaviors when
            # their parameters are too high
            l_train = torch.clamp(l_train, max=1e8)
            gene_expressions = distributions.Poisson(l_train).sample(
            )  # Shape : (n_samples, n_cells_batch, n_genes)
            if zero_inflated:
                p_zero = (1.0 + torch.exp(-px_dropout)).pow(-1)
                random_prob = torch.rand_like(p_zero)
                gene_expressions[random_prob <= p_zero] = 0

            gene_expressions = gene_expressions.permute(
                [1, 2, 0])  # Shape : (n_cells_batch, n_genes, n_samples)

            x_old.append(sample_batch.cpu())
            x_new.append(gene_expressions.cpu())

        x_old = torch.cat(x_old)  # Shape (n_cells, n_genes)
        x_new = torch.cat(x_new)  # Shape (n_cells, n_genes, n_samples)
        if genes is not None:
            gene_ids = self.gene_dataset.genes_to_index(genes)
            x_new = x_new[:, gene_ids, :]
            x_old = x_old[:, gene_ids]
        return x_new.numpy(), x_old.numpy()
示例#2
0
    def generate_data(self):
        if self.n_comps == 2:
            cell_type = distributions.Bernoulli(probs=self.cat).sample(
                (self.n_cells,)
            )
        else:
            cell_type = torch.zeros(self.n_cells)
        z = torch.zeros((self.n_cells, self.n_latent)).float()
        for idx in range(z.shape[0]):
            z[idx, :] = self.dists[int(cell_type[idx])].sample()
        self.z = z
        rate = compute_rate(self.a_mat, self.b, z)
        self.h = rate

        gene_expressions = np.expand_dims(
            distributions.Poisson(rate=rate).sample(), axis=0
        )
        labels = np.expand_dims(cell_type, axis=0) if self.n_comps == 2 else None
        gene_names = np.arange(self.n_genes).astype(str)

        self.populate_from_per_batch_list(
            gene_expressions,
            labels_per_batch=labels,
            gene_names=gene_names,
        )
示例#3
0
    def gen_data(self):
        # sample overall relative abundances of ASVs from a Dirichlet distribution
        self.ASV_rel_abundance = tdist.Dirichlet(torch.ones(
            self.numASVs)).sample()

        # sample spatial embedding of ASVs
        self.w = torch.zeros(self.numASVs, self.D)
        w_prior = tdist.MultivariateNormal(torch.zeros(self.D),
                                           torch.eye(self.D))

        for o in range(0, self.numASVs):
            self.w[o, :] = w_prior.sample()

        self.data = torch.zeros(self.numParticles, self.numASVs)

        num_nonempty = 0

        mu_prior = tdist.MultivariateNormal(torch.zeros(self.D),
                                            torch.eye(self.D))
        rad_prior = tdist.LogNormal(torch.tensor([self.mu_rad]),
                                    torch.tensor([self.mu_std]))

        # replace with neg bin prior
        num_reads_prior = tdist.Poisson(
            torch.tensor([self.avgNumReadsParticle]))

        while (num_nonempty < self.numParticles):
            # sample center
            mu = mu_prior.sample()
            rad = rad_prior.sample()

            zr = torch.zeros(1, self.numASVs, dtype=torch.float64)
            for o in range(0, self.numASVs):
                p = mu - self.w[o, :]
                p = torch.pow(p, 2.0) / rad
                p = (torch.sum(p)).sqrt()
                zr[0, o] = unitboxcar(p, 0.0, 2.0, self.step_approx)

            if torch.sum(zr) > 0.95:
                particle = Particle(mu, self)
                particle.zr = zr
                self.particles.append(particle)

                # renormalize particle abundances
                rn = self.ASV_rel_abundance * zr
                rn = rn / torch.sum(rn)

                # sample relative abundances for particle
                part_rel_abundance = tdist.Dirichlet(rn * self.conc).sample()

                # sample number of reads for particle
                # (replace w/ neg bin instead of Poisson)
                num_reads = num_reads_prior.sample().long().item()
                particle.total_reads = num_reads

                particle.reads = tdist.Multinomial(
                    num_reads, probs=part_rel_abundance).sample()

                num_nonempty += 1
示例#4
0
文件: NBNTM.py 项目: mxiny/NB-NTM
    def sample_poisson(self, lam):
        # get Poisson(lam)
        with torch.no_grad():
            sample = distributions.Poisson(lam).sample()
            eps = (sample - lam) / torch.sqrt(lam + 1e-12)

        z = torch.sqrt(lam + 1e-12) * eps + lam

        return z
示例#5
0
    def __init__(self, in_features: int, out_channels: int, num_repetitions: int = 1, dropout=0.0):
        """Creat a poisson layer.

        Args:
            out_channels: Number of parallel representations for each input feature.
            in_features: Number of input features.

        """
        super().__init__(in_features, out_channels, num_repetitions, dropout)
        self.rate = nn.Parameter(torch.rand(1, in_features, out_channels, num_repetitions))
        self.poisson = dist.Poisson(rate=self.rate)
示例#6
0
    def __init__(self, multiplicity, in_features, dropout=0.0):
        """Creat a poisson layer.

        Args:
            multiplicity: Number of parallel representations for each input feature.
            in_features: Number of input features.

        """
        super().__init__(multiplicity, in_features, dropout)
        self.rate = nn.Parameter(torch.rand(1, in_features, multiplicity))
        self.poisson = dist.Poisson(rate=self.rate)
    def generate_leaves(
        self,
        n_samples: int = 100,
        batch_size: int = 128,
    ):
        """Create observation samples from the Posterior Predictive distribution

        Parameters
        ----------
        n_samples
            Number of required samples for each cell
        genes
            Indices of genes of interest
        batch_size
            Desired Batch size to generate data

        Returns
        -------
        x_new : :py:class:`torch.Tensor`
            tensor with shape (n_cells, n_genes, n_samples)
        x_old : :py:class:`torch.Tensor`
            tensor with shape (n_cells, n_genes)

        """
        assert self.model.reconstruction_loss in ["nb", "poisson"]
        x_old = []
        x_new = []
        for tensors in self.update({"batch_size": len(self.barcodes)}):
            sample_batch, _, _, batch_index, labels = tensors
            outputs = self.model.inference(sample_batch)
            px_r = outputs["px_r"]
            px_rate = outputs["px_rate"]

            if self.model.reconstruction_loss == "poisson":
                l_train = px_rate
                l_train = torch.clamp(l_train, max=1e8)
                dist = distributions.Poisson(
                    l_train)  # Shape : (n_samples, n_cells_batch, n_genes)
            elif self.model.reconstruction_loss == "nb":
                dist = NegativeBinomial(mu=px_rate, theta=px_r)
            else:
                raise ValueError(
                    "{} reconstruction error not handled right now".format(
                        self.model.reconstruction_loss))
            gene_expressions = dist.sample(
            )  #.permute([1, 2, 0])  # Shape : (n_cells_batch, n_genes, n_samples)

            x_old.append(sample_batch.cpu())
            x_new.append(gene_expressions.cpu())

        x_old = torch.cat(x_old)  # Shape (n_cells, n_genes)
        x_new = torch.cat(x_new)  # Shape (n_cells, n_genes, n_samples)
        return x_new.numpy(), x_old.numpy()
示例#8
0
    def decode_x(self, w, z):
        params = self.decoder_x(torch.cat((w, z), dim=-1))

        px_wz = []
        samples = []

        for indices in self.likelihood_partition:
            data_type = self.likelihood_partition[indices]

            params_subset = params[:, indices[0]:(indices[1] + 1)]

            if data_type == 'real':
                cov_diag = self.likelihood_params['lik_var'] * torch.ones_like(
                    params_subset).to(self.device)

                dist = D.Normal(loc=params_subset, scale=cov_diag.sqrt())

            elif data_type == 'categorical':
                dist = D.OneHotCategorical(logits=params_subset)
            elif data_type == 'binary':
                dist = D.Bernoulli(logits=params_subset)
            elif data_type == 'positive':
                lognormal_var = self.likelihood_params[
                    'lik_var_lognormal'] * torch.ones_like(params_subset).to(
                        self.device)

                dist = D.LogNormal(loc=params_subset,
                                   scale=lognormal_var.sqrt())
            elif data_type == 'count':
                positive_params_subset = F.softplus(params_subset)
                dist = D.Poisson(rate=positive_params_subset)
            elif data_type == 'binomial':
                num_trials = self.likelihood_params['binomial_num_trials']
                dist = D.Binomial(total_count=num_trials, logits=params_subset)
            elif data_type == 'ordinal':
                h = params_subset[:, 0:1]
                thetas = torch.cumsum(F.softplus(params_subset[:, 1:]), axis=1)

                prob_lessthans = torch.sigmoid(thetas - h)
                probs = torch.cat((prob_lessthans, torch.ones(len(prob_lessthans), 1)), axis=1) - \
                        torch.cat((torch.zeros(len(prob_lessthans), 1), prob_lessthans), axis=1)

                dist = D.OneHotCategorical(probs=probs)
            else:
                raise NotImplementedError

            samples.append(dist.sample())
            px_wz.append(dist)

        sample_x = torch.cat(samples, axis=1)

        return params, sample_x, px_wz
示例#9
0
 def simulate_runs(self, coeffs, nruns=100, models=None):
     """Fits appropriation rule models and simulates optimal runs using those models
     """
     if models is None:
         models = self.fit_approp_rules(coeffs)
     runs = torch.ones(nruns, self.T+1) * self.Y0
     approps = torch.zeros(nruns, self.T+1)
     Nts = dist.Poisson(self.lambda_).sample((nruns, self.T)) if self.lambda_ else torch.zeros(nruns, self.T)
     ratios = torch.exp(self.mu - self.sigma**2 / 2 \
                        + dist.Normal(loc=Nts*self.m, scale=torch.sqrt(self.sigma**2 + Nts*self.s2)).sample())
     for t in range(1, self.T+1):
         approps[:, t] = models[t-1].predict(runs[:,t-1], ratios[:,t-1]).flatten()
         runs[:, t] = runs[:, t-1] * ratios[:, t-1] - approps[:, t]
     return runs, approps
示例#10
0
 def fit_approp_rules(self, coeffs, npaths=100, lr=0.02, epochs_per_step=50, Model=AppropRule):
     """Estimates optimal appropriation rules to maximize the manager's utility.
     """
     print(f'fitting approp rules with {coeffs}...')
     time0 = time()
     # Draw transition ratios
     Nts = dist.Poisson(self.lambda_).sample((npaths, self.T)) if self.lambda_ else torch.zeros(npaths, self.T)
     ratios = torch.exp(self.mu - self.sigma**2 / 2 \
                        + dist.Normal(loc=Nts*self.m, scale=torch.sqrt(self.sigma**2 + Nts*self.s2)).sample())
     # calculate [fixed] future value discounts
     discounts = torch.exp(-self.rho * torch.arange(self.T+1))
     # calculate endpoints
     Yt = self.Y0 * ratios.prod(axis=1)
     models = [Model(self.max_approp) for _ in range(self.T)]
     for t in reversed(range(1, self.T+1)):
         def loss_fn(approp):
             utils = npaths * (self.theta * approp \
                               + poly_eval(Yt*(ratios[:, t-1] - self.ratio) - approp, coeffs))
             for s in range(t+1, self.T+1):
                 new_Yt = ratios[: t:(s-1)].prod(axis=1) * Yt
                 approp = models[s-1].predict(new_Yt, ratios[:, s-1])
                 utils += discounts[s-t] * (self.theta * approp \
                                            + poly_eval(new_Yt * (ratios[:, s-1] - self.ratio) - approp, coeffs))
             return -utils.mean()
         optimizer = torch.optim.Adam(models[t-1].parameters(), lr=lr)
         losses = []
         for i in range(epochs_per_step):
             approp = models[t-1].forward(Yt, ratios[:, t-1])
             loss = loss_fn(approp)
             losses.append(loss.item())
             optimizer.zero_grad()
             loss.backward()
             optimizer.step()
             # exit prematurely if no progress is being made
             if i > 3 and (losses[-1] - losses[-4]) / abs(losses[-4]) > -0.001:
                 break
         if PLOT_LOSSES:
             plt.plot(losses)
     if PLOT_LOSSES:
         plt.show()
     print(f'models fit in {time()-time0:.2f} s')
     self.models = models
     return models
示例#11
0
def loglik_count(batch_data, list_type, theta, normalization_params):
    output = dict()
    epsilon = 1e-6

    # Data outputs
    data, missing_mask = batch_data
    missing_mask = missing_mask.float()

    est_lambda = theta
    est_lambda = torch.clamp(torch.nn.Softplus()(est_lambda), epsilon, 1e20)

    # log_p_x = -torch.sum(log_poisson_loss(targets=data, log_input=torch.log(est_lambda),
    #                                       compute_full_loss=True), 1)
    poisson = td.Poisson(est_lambda)
    log_p_x = poisson.log_prob(data).sum(1)

    output['log_p_x'] = torch.mul(log_p_x, missing_mask)
    output['log_p_x_missing'] = torch.mul(log_p_x, 1.0 - missing_mask)
    output['params'] = est_lambda
    output['samples'] = poisson.sample()

    return output
示例#12
0
    def __init__(self, pi=[0.7], loc_reduce=0, n_cells=100, mu0_path='mu_0.npy', mu1_path='mu_2.npy',
                 separate_reduce=False, sig0_path='sigma_0.npy', sig1_path='sigma_2.npy'):
        super().__init__()

        current_dir = os.path.dirname(os.path.realpath(__file__))
        mu_0 = self.load_array(os.path.join(current_dir, mu0_path))
        mu_1 = self.load_array(os.path.join(current_dir, mu1_path))
        sigma_0 = self.load_array(os.path.join(current_dir, sig0_path))
        sigma_1 = self.load_array(os.path.join(current_dir, sig1_path))

        np.random.seed(0)
        torch.manual_seed(0)

        n_genes = len(mu_0)

        if not(separate_reduce):
            self.dist0 = distributions.MultivariateNormal(loc=mu_0-loc_reduce, covariance_matrix=sigma_0)
            self.dist1 = distributions.MultivariateNormal(loc=mu_1-loc_reduce, covariance_matrix=sigma_1)
        else:

            n_genes *= 2

            mu_0_new = torch.zeros((2*mu_0.shape[0],))
            mu_0_new[:mu_0.shape[0]] = mu_0
            mu_0_new[-mu_0.shape[0]:] = mu_0 - loc_reduce
            mu_0_new = mu_0_new.double()

            mu_1_new = torch.zeros((2*mu_1.shape[0],))
            mu_1_new[:mu_1.shape[0]] = mu_1
            mu_1_new[-mu_1.shape[0]:] = mu_1 - loc_reduce
            mu_1_new = mu_1_new.double()

            sigma_0 = sigma_0.cpu().numpy()
            sigma_1 = sigma_1.cpu().numpy()
            fac = 4
            sigma_0 = sigma_0 / fac
            sigma_1 = sigma_1 / fac
            np.fill_diagonal(sigma_0, fac*np.diag(sigma_0))
            np.fill_diagonal(sigma_1, fac* np.diag(sigma_1))
            sigma_0 = torch.tensor(sigma_0).double()
            sigma_1 = torch.tensor(sigma_1).double()


            sigma_0_new = torch.zeros((2*sigma_0.shape[0],2*sigma_0.shape[1]))
            sigma_0_new[:sigma_0.shape[0], :sigma_0.shape[1]] = sigma_0
            sigma_0_new[sigma_0.shape[0]:, sigma_0.shape[1]:] = sigma_0
            sigma_0_new = sigma_0_new.double()

            sigma_1_new = torch.zeros((2 * sigma_1.shape[0], 2 * sigma_1.shape[1]))
            sigma_1_new[:sigma_1.shape[0], :sigma_1.shape[1]] = sigma_1
            sigma_1_new[sigma_1.shape[0]:, sigma_1.shape[1]:] = sigma_1
            sigma_1_new = sigma_1_new.double()

            self.dist0 = distributions.MultivariateNormal(loc=mu_0_new, covariance_matrix=sigma_0_new)
            self.dist1 = distributions.MultivariateNormal(loc=mu_1_new, covariance_matrix=sigma_1_new)


        cell_type = distributions.Bernoulli(probs=torch.tensor(pi)).sample((n_cells,))
        zero_mask = (cell_type == 0).squeeze()
        one_mask = (cell_type == 1).squeeze()

        z = torch.zeros((n_cells, n_genes)).double()

        z[zero_mask, :] = self.dist0.sample((zero_mask.sum(),))
        z[one_mask, :] = self.dist1.sample((one_mask.sum(),))

        gene_expressions = distributions.Poisson(rate=z.exp()).sample().cpu().numpy()
        labels = cell_type.cpu().numpy()

        self.mask_zero_biological = (gene_expressions == 0)


        gene_expressions, batches = self.mask(gene_expressions, labels)

        gene_names = np.arange(n_genes).astype(str)

        keep_cells = (gene_expressions.sum(axis=1) > 0)
        gene_expressions = gene_expressions[keep_cells,:]
        if labels is not None:
            labels = labels[keep_cells]
        if batches is not None:
            batches = batches[keep_cells]

        self.populate_from_data(
            gene_expressions,
            labels =labels,
            gene_names=gene_names,
            batch_indices=batches,
        )
示例#13
0
    def generate(
        self,
        n_samples: int = 100,
        genes: Optional[np.ndarray] = None,
        batch_size: int = 256,
        #batch_size: int = 128,
    ):
        """
        Create observation samples from the Posterior Predictive distribution

        :param n_samples: Number of required samples for each cell
        :param genes: Indices of genes of interest
        :param batch_size: Desired Batch size to generate data

        :return: Tuple (x_new, x_old)
            Where x_old has shape (n_cells, n_genes)
            Where x_new has shape (n_cells, n_genes, n_samples)
        """
        assert self.model.reconstruction_loss in ["zinb", "zip"]
        zero_inflated = "zinb"

        rna_old = []
        rna_new = []
        atac_old = []
        atac_new = []
        for tensors in self.update({"batch_size": batch_size}):
            sample_batch, _, _, batch_index, labels = tensors
            outputs = self.model.inference(sample_batch,
                                           batch_index=batch_index,
                                           y=labels,
                                           n_samples=n_samples)
            p_rna_r = outputs["p_rna_r"]
            p_rna_rate = outputs["p_rna_rate"]
            p_rna_dropout = outputs["p_rna_dropout"]
            p_atac_mean = outputs["p_atac_mean"]
            p_atac_dropout = outputs["p_atac_dropout"]

            # Generating rna-seq data
            p = p_rna_rate / (p_rna_rate + p_rna_r)
            r = p_rna_r
            # Important remark: Gamma is parametrized by the rate = 1/scale!
            l_train_rna = distributions.Gamma(concentration=r,
                                              rate=(1 - p) / p).sample()
            # Clamping as distributions objects can have buggy behaviors when
            # their parameters are too high
            l_train_rna = torch.clamp(l_train_rna, max=1e8)
            gene_expressions = distributions.Poisson(l_train_rna).sample(
            )  # Shape : (n_samples, n_cells_batch, n_genes)

            #Generating atac-seq data
            l_train_atac = torch.clamp(p_atac_mean, max=1e2)
            atac_expressions = distributions.Poisson(l_train_atac).sample()

            # zero-inflate
            if zero_inflated:
                p_zero_rna = (1.0 + torch.exp(-p_rna_dropout)).pow(-1)
                random_prob_rna = torch.rand_like(p_zero_rna)
                gene_expressions[random_prob_rna <= p_zero_rna] = 0

                p_zero_atac = (1.0 + torch.exp(-p_atac_dropout)).pow(-1)
                random_prob_atac = torch.rand_like(p_zero_atac)
                atac_expressions[random_prob_atac <= p_zero_atac] = 0

            gene_expressions = gene_expressions.permute(
                [1, 2, 0])  # Shape : (n_cells_batch, n_genes, n_samples)
            atac_expressions = atac_expressions.permute([1, 2, 0])

            rna_old.append(sample_batch[0].cpu())
            rna_new.append(gene_expressions.cpu())
            atac_old.append(sample_batch[1].cpu())
            atac_new.append(atac_expressions.cpu())

        rna_old = torch.cat(rna_old)  # Shape (n_cells, n_genes)
        rna_new = torch.cat(rna_new)  # Shape (n_cells, n_genes, n_samples)
        if genes is not None:
            gene_ids = self.gene_dataset.genes_to_index(genes)
            rna_new = rna_new[:, gene_ids, :]
            rna_old = rna_old[:, gene_ids]
        return rna_new.numpy(), rna_old.numpy(), atac_new.numpy(
        ), rna_old.numpy()
示例#14
0
def s2() -> RVIdentifier:
    return dist.Poisson(theta2())
示例#15
0
 def forward(self, x: torch.Tensor) -> dist.Poisson:  # type: ignore
     """Output a Poisson distribution parameterized by ``exp(x)``."""
     _validate_input(x)
     return dist.Poisson(torch.exp(x))
示例#16
0
def main(args):
    print("Loading data...")
    teams, df = load_data()
    train = df[df["split"] == "train"]

    print("Starting inference...")
    samples = bm.GlobalNoUTurnSampler().infer(
        queries=[
            alpha(),
            home(),
            sd_att(),
            sd_def(),
            attack(),
            defend(),
        ],
        observations={
            s1(): torch.tensor(train["score1"].values),
            s2(): torch.tensor(train["score2"].values),
        },
        num_samples=args.num_samples,
        num_chains=args.num_chains,
        num_adaptive_samples=args.num_warmup,
    )

    samples = samples.to_xarray()
    fit = az.InferenceData(posterior=samples)

    print("Analyse posterior...")
    az.plot_forest(
        fit,
        backend="bokeh",
    )

    az.plot_trace(
        fit,
        backend="bokeh",
    )

    # Attack and defence
    quality = teams.copy()
    quality = quality.assign(
        attack=samples[attack()].mean(axis=(0, 1)),
        attacksd=samples[attack()].std(axis=(0, 1)),
        defend=samples[defend()].mean(axis=(0, 1)),
        defendsd=samples[defend()].std(axis=(0, 1)),
    )
    quality = quality.assign(
        attack_low=quality["attack"] - quality["attacksd"],
        attack_high=quality["attack"] + quality["attacksd"],
        defend_low=quality["defend"] - quality["defendsd"],
        defend_high=quality["defend"] + quality["defendsd"],
    )

    plot_quality(quality)

    # Predicted goals and table
    predict = df[df["split"] == "predict"]

    theta1 = (samples[alpha()].expand_dims("", axis=-1).values +
              samples[home()].expand_dims("", axis=-1).values +
              samples[attack()][:, :, predict["Home_id"]].values -
              samples[defend()][:, :, predict["Away_id"]].values)
    theta1 = torch.tensor(theta1.reshape(-1, theta1.shape[-1]))

    theta2 = (samples[alpha()].expand_dims("", axis=-1).values +
              samples[attack()][:, :, predict["Away_id"]].values -
              samples[defend()][:, :, predict["Home_id"]].values)
    theta2 = torch.tensor(theta2.reshape(-1, theta2.shape[-1]))

    score1 = np.array(dist.Poisson(torch.exp(theta1)).sample())
    score2 = np.array(dist.Poisson(torch.exp(theta2)).sample())

    predicted_full = predict.copy()
    predicted_full = predicted_full.assign(
        score1=score1.mean(axis=0).round(),
        score1error=score1.std(axis=0),
        score2=score2.mean(axis=0).round(),
        score2error=score2.std(axis=0),
    )

    predicted_full = train.append(
        predicted_full.drop(columns=["score1error", "score2error"]))

    print(score_table(df))
    print(score_table(predicted_full))
示例#17
0
  def forward(iota_xfull, iota_x, iota_y, mask_x, mask_y, batch_size, niw):
    tiled_iota_x = torch.Tensor.repeat(iota_x,[niw,1]); tiled_tiled_iota_x = torch.Tensor.repeat(tiled_iota_x,[niw,1])
    tiledmask_x = torch.Tensor.repeat(mask_x,[niw,1]); tiled_tiledmask_x = torch.Tensor.repeat(tiledmask_x,[niw,1])
    if not draw_miss: tiled_iota_xfull = torch.Tensor.repeat(iota_xfull,[niw,1])
    tiled_iota_y = torch.Tensor.repeat(iota_y,[niw,1]); tiled_tiled_iota_y = torch.Tensor.repeat(tiled_iota_y,[niw,1])
    tiledmask_y = torch.Tensor.repeat(mask_y,[niw,1]); tiled_tiledmask_y = torch.Tensor.repeat(tiledmask_y,[niw,1])
    if not draw_miss: tiled_iota_yfull = torch.Tensor.repeat(iota_yfull,[niw,1])
    
    ## uncorrelated covariates
    # p_x = td.Normal(loc=mu_x, scale=torch.nn.Softplus()(scale_x)+0.001)
    
    ## Correlated covariates (unstructured covariance structure)
    # p_x = td.multivariate_normal.MultivariateNormal(loc=mu_x,covariance_matrix=torch.nn.Softplus()(scale_x)+0.001)
    p_x = td.multivariate_normal.MultivariateNormal(loc=mu_x,covariance_matrix=torch.matmul(scale_x, scale_x.t()))  # multiply by transpose -> make it positive definite

    params_x = None; xm = iota_x; xm_flat = torch.Tensor.repeat(iota_x,[niw,1])  # if no missing x
    params_y = None; ym = iota_y; ym_flat = torch.Tensor.repeat(iota_y,[niw,1])

    ## NN_xm ## p(xm|xo,r)    (if missing in x detected)
    if miss_x:
      out_NN_xm = NN_xm(torch.cat([iota_x,mask_x],1))
      # bs x p -- > sample niw times
      qxmgivenxor = td.Normal(loc=out_NN_xm[..., :p],scale=torch.nn.Softplus()(out_NN_xm[..., p:(2*p)])+0.001)    ### condition contribution of this term in the ELBO by miss_x
      params_xm = {'mean':out_NN_xm[..., :p], 'scale':torch.nn.Softplus()(out_NN_xm[..., p:(2*p)])+0.001}
      if draw_miss: xm = qxmgivenxor.rsample([niw]); xm_flat = xm.reshape([niw*batch_size,p])
    else: 
      qxmgivenxor=None; params_xm=None; xm_flat = torch.Tensor.repeat(iota_x,[niw,1])
    # organize completed (sampled) xincluded for missingness model. observed values are not sampled
    if miss_x:
      if miss_y:
        tiled_xm_flat = torch.Tensor.repeat(xm_flat,[niw,1])
        xincluded = tiled_tiled_iota_x*(tiled_tiledmask_x) + tiled_xm_flat*(1-tiled_tiledmask_x)
      else:
        xincluded = tiled_iota_x*(tiledmask_x) + xm_flat*(1-tiledmask_x)
    else:
      xincluded = iota_x

    ## NN_ym ## p(ym|yo,x,r)   (if missing in y detected)    
    if miss_y:
      if not miss_x:
        out_NN_ym = NN_ym(torch.cat([iota_y, iota_x, mask_y],1))
        # bs x 1 --> sample niw times
      elif miss_x:
        out_NN_ym = NN_ym(torch.cat([tiled_iota_y, tiledmask_x*tiled_iota_x + (1-tiledmask_x)*xm_flat, tiledmask_y],1))
        # (niw*bs) x 1 --> sampled niw times
      if family=="Gaussian":
        qymgivenyor = td.Normal(loc=out_NN_ym[..., :1],scale=torch.nn.Softplus()(out_NN_ym[..., 1:2])+0.001)     ### condition contribution of this term in the ELBO by miss_y
        params_ym = {'mean':out_NN_ym[..., :1], 'scale':torch.nn.Softplus()(out_NN_ym[..., 1:2])+0.001}
      if draw_miss: ym = qymgivenyor.rsample([niw]); ym_flat = ym.reshape([-1,1])    # ym_flat is (niw*bs x 1) if no miss_x, and (niw*niw*bs x 1) if miss_x
    else:
      qymgivenyor=None; params_ym=None; ym_flat = torch.Tensor.repeat(iota_y,[niw,1])
    
    # organize completed (sampled) xincluded for missingness model. observed values are not sampled
    if miss_y:
      if miss_x:  yincluded = tiled_tiled_iota_y*(tiled_tiledmask_y) + ym_flat*(1-tiled_tiledmask_y)
      else:  yincluded = tiled_iota_y*(tiledmask_y) + ym_flat*(1-tiledmask_y)
    else:
      if miss_x:  yincluded = tiled_iota_y
      else:  yincluded = iota_y

    ## NN_y ##      p(y|x)
    out_NN_y = NN_y(xincluded)     # if miss_x and miss_y: this becomes niw*niw*bs x p, otherwise: niw*bs x p
    if family=="Gaussian":
      mu_y = invlink(link)(out_NN_y[..., 0]);  var_y = V(mu_y, torch.nn.Softplus()(alpha)+0.001, family)   # default: link="identity", family="Gaussian"
      pygivenx = td.Normal(loc = mu_y, scale = (var_y)**(1/2))    # scale = sd = var^(1/2)
      params_y = {'mean': mu_y.detach(), 'scale': (var_y.detach())**(1/2)}
    elif family=="Multinomial":
      probs = invlink(link)(out_NN_y[..., :C])
      pygivenx = td.OneHotCategorical(probs=probs)
      #print("probs:"); print(probs)
      #print("pygivenx (event_shape):"); print(pygivenx.event_shape)
      #print("pygivenx (batch_shape):"); print(pygivenx.batch_shape)
      params_y = {'probs': probs.detach()}
    elif family=="Poisson":
      lambda_y = invlink(link)(out_NN_y[..., 0])  # variance is the same as mean in Poisson
      pygivenx = td.Poisson(rate = lambda_y)
      params_y = {'lambda': lambda_y.detach()}

    #print(pygivenx.rsample().shape)

    ## NN_r ##   p(r|x,y,covars): always. Include option to specify covariates in X, y, and additional covars_miss
    # Organize covariates for missingness model (NN_r)
    if covars_r_y==1:
      if np.sum(covars_r_x)>0: covars_included = torch.cat([xincluded[:,covars_r_x==1], yincluded],1)
      else: covars_included = yincluded
    elif covars_r_y==0:
      if np.sum(covars_r_x)>0: covars_included = xincluded[:,covars_r_x==1]
      # else: IGNORABLE HERE. NO COVARIATES
    
    #print(covars_included.shape)
    #print(NN_r)
    if not Ignorable:
      if (covars): out_NN_r = NN_r(torch.cat([covars_included, covars_miss]))   # right now: just X in as covariates (Case 1)    (niw*niw*bs x p) for case 3, (niw*bs x p) for other cases
      else: out_NN_r = NN_r(covars_included)        # can additionally include covariates
      prgivenxy = td.Bernoulli(logits = out_NN_r)   # for just the features with missing valuess
      params_r = {'probs': torch.nn.Sigmoid()(out_NN_r).detach()}
    else: prgivenxy=None; params_r=None


    return xincluded, yincluded, p_x, qxmgivenxor, qymgivenyor, pygivenx, prgivenxy, params_xm, params_ym, params_y, params_r
示例#18
0
    def __init__(
        self,
        pi=[0.7],
        n_cells=100,
        mu0_path="mu_0.npy",
        mu1_path="mu_2.npy",
        sig0_path="sigma_0.npy",
        sig1_path="sigma_2.npy",
        seed=42,
        n_genes=None,
        change_means=False,
        cuda_mcmc=False,
    ):
        super().__init__()
        torch.manual_seed(seed)
        assert len(pi) == 1
        self.probas = torch.tensor([1.0 - pi[0], pi[0]])
        self.logprobas = np.log(self.probas)
        current_dir = os.path.dirname(os.path.realpath(__file__))
        self.mu_0 = self.load_array(os.path.join(current_dir, mu0_path),
                                    n_genes)
        self.mu_1 = self.load_array(os.path.join(current_dir, mu1_path),
                                    n_genes)

        n_genes = len(self.mu_0)
        if change_means:
            self.mu_0[:n_genes // 4] = self.mu_0[:n_genes // 4] / 1.5
            self.mu_0[n_genes // 4:n_genes //
                      2] = (self.mu_0[n_genes // 4:n_genes // 2] / 0.5)

        self.sigma_0 = self.load_array(os.path.join(current_dir, sig0_path),
                                       n_genes)
        self.sigma_1 = self.load_array(os.path.join(current_dir, sig1_path),
                                       n_genes)

        d1, d2 = self.sigma_1.shape
        assert d1 == d2
        self.sigma_0 = self.sigma_0 + 2e-6 * torch.eye(
            d2, d2, dtype=self.sigma_0.dtype)
        self.sigma_1 = self.sigma_1 + 2e-6 * torch.eye(
            d2, d2, dtype=self.sigma_1.dtype)

        self.mus = torch.stack([self.mu_0, self.mu_1]).float()
        self.sigmas = torch.stack([self.sigma_0, self.sigma_1]).float()
        if cuda_mcmc:
            self.mus.cuda()
            self.sigmas.cuda()
            self.probas.cuda()
            self.logprobas.cuda()
        self.dist0 = distributions.MultivariateNormal(
            loc=self.mu_0, covariance_matrix=self.sigma_0)
        self.dist1 = distributions.MultivariateNormal(
            loc=self.mu_1, covariance_matrix=self.sigma_1)
        self.dist_x = distributions.Poisson

        cell_type = distributions.Bernoulli(probs=torch.tensor(pi)).sample(
            (n_cells, ))
        zero_mask = (cell_type == 0).squeeze()
        one_mask = ~zero_mask  # (cell_type == 1).squeeze()

        z = torch.zeros((n_cells, n_genes)).double()
        z[zero_mask] = self.dist0.sample((zero_mask.sum(), ))
        z[one_mask] = self.dist1.sample((one_mask.sum(), ))
        print(z.min(), z.max())
        rate = torch.clamp(z.exp(), max=1e5)
        gene_expressions = np.expand_dims(
            distributions.Poisson(rate=rate).sample(), axis=0)
        labels = np.expand_dims(cell_type, axis=0)
        gene_names = np.arange(n_genes).astype(str)

        print("Dataset shape: ", gene_expressions.shape)
        print("Gene expressions bounds: ", gene_expressions.min(),
              gene_expressions.max())

        self.populate_from_per_batch_list(
            gene_expressions,
            labels_per_batch=labels,
            gene_names=gene_names,
        )