Пример #1
0
def test_cortex(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             train_size=0.5,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.train_set.reconstruction_error()
    trainer_cortex_vae.train_set.differential_expression_stats()
    trainer_cortex_vae.train_set.generate_feature_correlation_matrix(
        n_samples=2, correlation_type="pearson")
    trainer_cortex_vae.train_set.generate_feature_correlation_matrix(
        n_samples=2, correlation_type="spearman")
    trainer_cortex_vae.train_set.imputation(n_samples=1)
    trainer_cortex_vae.test_set.imputation(n_samples=5)

    trainer_cortex_vae.corrupt_posteriors(corruption="binomial")
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.uncorrupt_posteriors()

    trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1,
                                                      show_plot=False,
                                                      title_plot="imputation",
                                                      save_path=save_path)
    trainer_cortex_vae.train_set.generate_parameters()

    n_cells, n_genes = (
        len(trainer_cortex_vae.train_set.indices),
        cortex_dataset.nb_genes,
    )
    n_samples = 3
    (dropout, means,
     dispersions) = trainer_cortex_vae.train_set.generate_parameters()
    assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells,
                                                                   n_genes)
    assert dispersions.shape == (n_cells, n_genes)
    (dropout, means,
     dispersions) = trainer_cortex_vae.train_set.generate_parameters(
         n_samples=n_samples)
    assert dropout.shape == (n_samples, n_cells, n_genes)
    assert means.shape == (n_samples, n_cells, n_genes)
    (dropout, means,
     dispersions) = trainer_cortex_vae.train_set.generate_parameters(
         n_samples=n_samples, give_mean=True)
    assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells,
                                                                   n_genes)

    full = trainer_cortex_vae.create_posterior(vae,
                                               cortex_dataset,
                                               indices=np.arange(
                                                   len(cortex_dataset)))
    x_new, x_old = full.generate(n_samples=10)
    assert x_new.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes,
                           10)
    assert x_old.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes)

    trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1,
                                                      show_plot=False,
                                                      title_plot="imputation",
                                                      save_path=save_path)

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches,
                   cortex_dataset.n_labels)
    trainer_cortex_svaec = JointSemiSupervisedTrainer(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=3,
        use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1)
    trainer_cortex_svaec.labelled_set.accuracy()
    trainer_cortex_svaec.full_dataset.reconstruction_error()

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches,
                   cortex_dataset.n_labels)
    trainer_cortex_svaec = AlternateSemiSupervisedTrainer(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=3,
        use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1, lr=1e-2)
    trainer_cortex_svaec.unlabelled_set.accuracy()
    data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data()
    data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data()
    compute_accuracy_svc(
        data_train,
        labels_train,
        data_test,
        labels_test,
        param_grid=[{
            "C": [1],
            "kernel": ["linear"]
        }],
    )
    compute_accuracy_rf(
        data_train,
        labels_train,
        data_test,
        labels_test,
        param_grid=[{
            "max_depth": [3],
            "n_estimators": [10]
        }],
    )

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    cls_trainer = ClassifierTrainer(cls, cortex_dataset)
    cls_trainer.train(n_epochs=1)
    cls_trainer.train_set.accuracy()
Пример #2
0
 def test_populate(self):
     dataset = CortexDataset(save_path="tests/data")
     unsupervised_training_one_epoch(dataset)
Пример #3
0
def to_tensor(x):
    """ numpy array to pytorch tensor """
    return torch.from_numpy(x.astype('float32')).to(torch_device)


def to_array(x):
    """ pytorch tensor to numpy array """
    if hasattr(x, 'todense'):
        return np.array(x.todense())
    if hasattr(x, 'cpu'):
        return x.data.cpu().numpy()
    return x


# Load dataset
cortex = CortexDataset(save_path=SAVE_DATA_PATH)
X = cortex.X
labels = cortex.cell_types
n_labels = len(labels)
Y = one_hot(cortex.labels.ravel(), n_labels)

# ===========================================================================
# scVI
# ===========================================================================
scvi = VAE(n_input=cortex.nb_genes,
           n_batch=0,
           n_labels=0,
           n_hidden=n_hidden,
           n_latent=n_latent,
           n_layers=n_layer,
           dispersion=dispersion,
Пример #4
0
def test_differential_expression(save_path):
    dataset = CortexDataset(save_path=save_path)
    n_cells = len(dataset)
    all_indices = np.arange(n_cells)
    vae = VAE(dataset.nb_genes, dataset.n_batches)
    trainer = UnsupervisedTrainer(vae, dataset, train_size=0.5, use_cuda=use_cuda)
    trainer.train(n_epochs=2)
    post = trainer.create_posterior(vae, dataset, shuffle=False, indices=all_indices)

    with tempfile.TemporaryDirectory() as temp_dir:
        posterior_save_path = os.path.join(temp_dir, "posterior_data")
        post = post.sequential(batch_size=3)
        post.save_posterior(posterior_save_path)
        new_vae = VAE(dataset.nb_genes, dataset.n_batches)
        new_post = load_posterior(posterior_save_path, model=new_vae, use_cuda=False)
    assert new_post.data_loader.batch_size == 3
    assert np.array_equal(new_post.indices, post.indices)
    assert np.array_equal(new_post.gene_dataset.X, post.gene_dataset.X)

    # Sample scale example
    px_scales = post.scale_sampler(
        n_samples_per_cell=4, n_samples=None, selection=all_indices
    )["scale"]
    assert (
        px_scales.shape[1] == dataset.nb_genes
    ), "posterior scales should have shape (n_samples, n_genes)"

    # Differential expression different models
    idx_1 = [1, 2, 3]
    idx_2 = [4, 5, 6, 7]
    de_dataframe = post.differential_expression_score(
        idx1=idx_1,
        idx2=idx_2,
        n_samples=10,
        mode="vanilla",
        use_permutation=True,
        M_permutation=100,
    )

    de_dataframe = post.differential_expression_score(
        idx1=idx_1,
        idx2=idx_2,
        n_samples=10,
        mode="change",
        use_permutation=True,
        M_permutation=100,
        cred_interval_lvls=[0.5, 0.95],
    )
    print(de_dataframe.keys())
    assert (
        de_dataframe["lfc_confidence_interval_0.5_min"]
        <= de_dataframe["lfc_confidence_interval_0.5_max"]
    ).all()
    assert (
        de_dataframe["lfc_confidence_interval_0.95_min"]
        <= de_dataframe["lfc_confidence_interval_0.95_max"]
    ).all()

    # DE estimation example
    de_probabilities = de_dataframe.loc[:, "proba_de"]
    assert ((0.0 <= de_probabilities) & (de_probabilities <= 1.0)).all()

    # Test totalVI DE
    sp = os.path.join(save_path, "10X")
    dataset = Dataset10X(dataset_name="pbmc_10k_protein_v3", save_path=sp)
    n_cells = len(dataset)
    all_indices = np.arange(n_cells)
    vae = TOTALVI(
        dataset.nb_genes, len(dataset.protein_names), n_batch=dataset.n_batches
    )
    trainer = TotalTrainer(
        vae, dataset, train_size=0.5, use_cuda=use_cuda, early_stopping_kwargs=None
    )
    trainer.train(n_epochs=2)
    post = trainer.create_posterior(
        vae, dataset, shuffle=False, indices=all_indices, type_class=TotalPosterior
    )

    # Differential expression different models
    idx_1 = [1, 2, 3]
    idx_2 = [4, 5, 6, 7]
    de_dataframe = post.differential_expression_score(
        idx1=idx_1,
        idx2=idx_2,
        n_samples=10,
        mode="vanilla",
        use_permutation=True,
        M_permutation=100,
    )

    de_dataframe = post.differential_expression_score(
        idx1=idx_1,
        idx2=idx_2,
        n_samples=10,
        mode="change",
        use_permutation=True,
        M_permutation=100,
    )
Пример #5
0
def test_cortex(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             train_size=0.5,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.train_set.ll()
    trainer_cortex_vae.train_set.differential_expression_stats()

    trainer_cortex_vae.corrupt_posteriors(corruption='binomial')
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.uncorrupt_posteriors()

    trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1,
                                                      show_plot=False,
                                                      title_plot='imputation',
                                                      save_path=save_path)

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches,
                   cortex_dataset.n_labels)
    trainer_cortex_svaec = JointSemiSupervisedTrainer(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=3,
        use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1)
    trainer_cortex_svaec.labelled_set.accuracy()
    trainer_cortex_svaec.full_dataset.ll()

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches,
                   cortex_dataset.n_labels)
    trainer_cortex_svaec = AlternateSemiSupervisedTrainer(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=3,
        use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1, lr=1e-2)
    trainer_cortex_svaec.unlabelled_set.accuracy()
    data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data()
    data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data()
    compute_accuracy_svc(data_train,
                         labels_train,
                         data_test,
                         labels_test,
                         param_grid=[{
                             'C': [1],
                             'kernel': ['linear']
                         }])
    compute_accuracy_rf(data_train,
                        labels_train,
                        data_test,
                        labels_test,
                        param_grid=[{
                            'max_depth': [3],
                            'n_estimators': [10]
                        }])

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    cls_trainer = ClassifierTrainer(cls, cortex_dataset)
    cls_trainer.train(n_epochs=1)
    cls_trainer.train_set.accuracy()
Пример #6
0
def test_fish_rna():
    gene_dataset_fish = SmfishDataset()
    gene_dataset_seq = CortexDataset(genes_fish=gene_dataset_fish.gene_names,
                                     genes_to_keep=[],
                                     additional_genes=50)
    benchamrk_fish_scrna(gene_dataset_seq, gene_dataset_fish)
Пример #7
0
def test_fish_rna(save_path):
    gene_dataset_fish = SmfishDataset(save_path)
    gene_dataset_seq = CortexDataset(save_path=save_path,
                                     genes_fish=gene_dataset_fish.gene_names,
                                     genes_to_keep=[], additional_genes=50)
    benchmark_fish_scrna(gene_dataset_seq, gene_dataset_fish)
Пример #8
0
show_plot = True

import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from scvi.dataset import CortexDataset, RetinaDataset
from scvi.models import *
from scvi.inference import UnsupervisedTrainer
import torch

import ssl

ssl._create_default_https_context = ssl._create_unverified_context

gene_dataset = CortexDataset(save_path=save_path)

n_epochs = 400 if n_epochs_all is None else n_epochs_all
lr = 1e-3
use_batches = False
use_cuda = True

vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches * use_batches)
trainer = UnsupervisedTrainer(vae,
                              gene_dataset,
                              train_size=0.75,
                              use_cuda=use_cuda,
                              frequency=5,
                              verbose=True)

trainer.train(n_epochs=n_epochs, lr=lr)