示例#1
0
    def test_populate_from_datasets_with_measurments(self):
        data = np.random.randint(1, 5, size=(5, 10))
        gene_names = np.array(["gene_%d" % i for i in range(10)])

        paired1 = np.ones((5, 5)) * np.arange(0, 5)
        pair_names1 = ["gabou", "achille", "pedro", "oclivio", "gayoso"]
        y1 = CellMeasurement(name="dev",
                             data=paired1,
                             columns_attr_name="dev_names",
                             columns=pair_names1)
        paired2 = np.ones((5, 4)) * np.arange(0, 4)
        pair_names2 = ["gabou", "oclivio", "achille", "pedro"]
        y2 = CellMeasurement(name="dev",
                             data=paired2,
                             columns_attr_name="dev_names",
                             columns=pair_names2)

        dataset1 = GeneExpressionDataset()
        dataset2 = GeneExpressionDataset()

        dataset1.populate_from_data(data, Ys=[y1], gene_names=gene_names)
        dataset2.populate_from_data(data, Ys=[y2], gene_names=gene_names)

        dataset = GeneExpressionDataset()
        dataset.populate_from_datasets([dataset1, dataset2])

        self.assertTrue(hasattr(dataset, "dev"))
        self.assertTrue(hasattr(dataset, "dev_names"))

        self.assertListEqual(dataset.dev_names.tolist(),
                             ["achille", "gabou", "oclivio", "pedro"])
        self.assertListEqual(dataset.dev[0].tolist(), [1, 0, 3, 2])
        self.assertListEqual(dataset.dev[5].tolist(), [2, 0, 1, 3])
示例#2
0
def create_datasets():
    rs = RandomState(0)
    data_a = np.sort(rs.normal(0, 10, 500)).astype(int).reshape(100, 5)
    gene_names_a = list("ABCDE")
    cell_types_a = ["alpha", "beta", "gamma", "delta"]
    labels_a = rs.choice(np.arange(len(cell_types_a)), data_a.shape[0])
    batch_indices_a = np.random.choice(np.arange(5), size=data_a.shape[0])

    data_b = np.sort(rs.normal(100, 10, 300)).astype(int).reshape(100, 3)
    gene_names_b = list("BFA")
    cell_types_b = ["alpha", "epsilon", "rho"]
    labels_b = rs.choice(np.arange(len(cell_types_b)), data_b.shape[0])
    batch_indices_b = rs.choice(np.arange(5), size=data_b.shape[0])

    dataset_a = GeneExpressionDataset()
    dataset_b = GeneExpressionDataset()
    dataset_a.populate_from_data(X=data_a,
                                 labels=labels_a,
                                 gene_names=gene_names_a,
                                 cell_types=cell_types_a,
                                 batch_indices=batch_indices_a)
    dataset_a.name = "test_a"

    dataset_b.populate_from_data(X=data_b,
                                 labels=labels_b,
                                 gene_names=gene_names_b,
                                 cell_types=cell_types_b,
                                 batch_indices=batch_indices_b)
    dataset_b.name = "test_b"
    return dataset_a, dataset_b
示例#3
0
    def test_dense_subsample_genes(self):
        data = [
            np.random.randint(1, 5, size=(50, 26)),
            np.random.randint(1, 5, size=(50, 26)),
            np.random.randint(1, 5, size=(50, 26)),
        ]

        # With default
        dataset = GeneExpressionDataset()
        dataset.populate_from_per_batch_list(data)
        n_genes = dataset.nb_genes
        n_top = n_genes // 2
        dataset.subsample_genes(new_n_genes=n_top, mode="cell_ranger")
        assert dataset.nb_genes == n_top

        # With Seurat v2
        dataset = GeneExpressionDataset()
        dataset.populate_from_per_batch_list(data)
        dataset.subsample_genes(new_n_genes=n_top, mode="seurat_v2")
        assert dataset.nb_genes == n_top

        # With Seurat v3
        dataset = GeneExpressionDataset()
        dataset.populate_from_per_batch_list(data)
        dataset.subsample_genes(new_n_genes=n_top, mode="seurat_v3")
        assert dataset.nb_genes == n_top
示例#4
0
    def test_labels(self):
        data = np.ones((25, 10)) * 100
        labels = np.array(range(25))
        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data, labels=labels)
        self.assertTupleEqual((25, 1), dataset.labels.shape)
        self.assertEqual(dataset.labels[5, 0], 5)

        labels = np.ones(25) * 5
        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data, labels=labels)
        self.assertTupleEqual(dataset.labels.shape, (25, 1))
        self.assertEqual(dataset.labels[5, 0], 0)
示例#5
0
    def test_populate_from_datasets_with_measurments(self):
        data = np.random.randint(1, 5, size=(5, 10))
        gene_names = np.array(["gene_%d" % i for i in range(10)])

        paired1 = np.ones((5, 5)) * np.arange(0, 5)
        pair_names1 = ["gabou", "achille", "pedro", "oclivio", "gayoso"]
        y1 = CellMeasurement(name="dev",
                             data=paired1,
                             columns_attr_name="dev_names",
                             columns=pair_names1)
        paired2 = np.ones((5, 4)) * np.arange(0, 4)
        pair_names2 = ["gabou", "oclivio", "achille", "pedro"]
        y2 = CellMeasurement(name="dev",
                             data=paired2,
                             columns_attr_name="dev_names",
                             columns=pair_names2)

        dataset1 = GeneExpressionDataset()
        dataset2 = GeneExpressionDataset()

        dataset1.populate_from_data(data, Ys=[y1], gene_names=gene_names)
        dataset2.populate_from_data(data, Ys=[y2], gene_names=gene_names)

        dataset = GeneExpressionDataset()
        dataset.populate_from_datasets(
            [copy.deepcopy(dataset1),
             copy.deepcopy(dataset2)])

        self.assertTrue(hasattr(dataset, "dev"))
        self.assertTrue(hasattr(dataset, "dev_names"))

        self.assertListEqual(dataset.dev_names.tolist(),
                             ["achille", "gabou", "oclivio", "pedro"])
        self.assertListEqual(dataset.dev[0].tolist(), [1, 0, 3, 2])
        self.assertListEqual(dataset.dev[5].tolist(), [2, 0, 1, 3])

        # Take union of dev columns, 0s fill remainder
        dataset = GeneExpressionDataset()
        dataset.populate_from_datasets(
            [copy.deepcopy(dataset1),
             copy.deepcopy(dataset2)],
            cell_measurement_intersection={"dev": False},
        )
        self.assertListEqual(
            dataset.dev_names.tolist(),
            ["achille", "gabou", "gayoso", "oclivio", "pedro"],
        )
        mask = dataset.get_batch_mask_cell_measurement("dev")
        self.assertEqual(mask[1][2].astype(int), 0)
示例#6
0
    def test_collate_add(self):
        data = np.ones((25, 2)) * np.arange(0, 25).reshape((-1, 1))
        batch_indices = np.arange(0, 25).reshape((-1, 1))
        x_coords = np.arange(0, 25).reshape((-1, 1))
        proteins = (np.ones((25, 3)) + np.arange(0, 25).reshape(
            (-1, 1)) + np.arange(0, 3))
        proteins_name = ["A", "B", "C"]
        dataset = GeneExpressionDataset()
        dataset.populate_from_data(
            data,
            batch_indices=batch_indices,
            cell_attributes_dict={"x_coords": x_coords},
            Ys=[
                CellMeasurement(
                    name="proteins",
                    data=proteins,
                    columns_attr_name="protein_names",
                    columns=proteins_name,
                )
            ],
        )

        collate_fn = dataset.collate_fn_builder(add_attributes_and_types={
            "x_coords": np.float32,
            "proteins": np.float32
        })
        x, mean, var, batch, labels, x_coords_tensor, proteins_tensor = collate_fn(
            [1, 2])
        self.assertListEqual(x_coords_tensor.tolist(), [[1.0], [2.0]])
        self.assertListEqual(proteins_tensor.tolist(),
                             [[2.0, 3.0, 4.0], [3.0, 4.0, 5.0]])
示例#7
0
    def create_dataset(self, path):
        print("Reading rds")
        ro.r("sce<-readRDS('%s')" % path)
        print("Extracting log counts")
        log_counts = ro.r("logcounts(sce)")
        print("Transforming log count to counts")
        counts = (np.exp(log_counts * np.log(2)) - 1).T.astype(np.int)
        gene_symbols = ro.r("rowData(sce)$feature_symbol")
        labels = ro.r("colData(sce)$cell_type1")
        labels_levels = ro.r("levels(colData(sce)$cell_type1)")
        if labels_levels is not rpy2.rinterface.NULL:
            labels = np.array([labels_levels[int(l) - 1] for l in labels])

        cell_types = list(np.unique(labels))
        labels = np.array([cell_types.index(l) for l in labels])

        valid_idx = (counts.sum(axis=1) >
                     10).ravel()  # Filter bad quality cells
        counts = counts[valid_idx]
        labels = labels[valid_idx]
        gene_expression_dataset = GeneExpressionDataset(
            *GeneExpressionDataset.get_attributes_from_matrix(counts,
                                                              labels=labels),
            cell_types=cell_types)
        gene_expression_dataset.gene_symbols = gene_symbols
        return gene_expression_dataset
示例#8
0
    def test_special_dataset_size(self):
        gene_dataset = GeneExpressionDataset()
        x = np.random.randint(1, 100, (17 * 2, 10))
        y = np.random.randint(1, 100, (17 * 2, 10))
        gene_dataset.populate_from_data(x)
        protein_data = CellMeasurement(
            name="protein_expression",
            data=y,
            columns_attr_name="protein_names",
            columns=np.arange(10),
        )
        gene_dataset.initialize_cell_measurement(protein_data)

        # Test UnsupervisedTrainer
        vae = VAE(
            gene_dataset.nb_genes,
            n_batch=gene_dataset.n_batches,
            n_labels=gene_dataset.n_labels,
        )
        trainer = UnsupervisedTrainer(
            vae,
            gene_dataset,
            train_size=0.5,
            use_cuda=False,
            data_loader_kwargs={"batch_size": 8},
        )
        trainer.train(n_epochs=1)

        # Test JVATrainer
        jvae = JVAE(
            [gene_dataset.nb_genes, gene_dataset.nb_genes],
            gene_dataset.nb_genes,
            [slice(None)] * 2,
            ["zinb", "zinb"],
            [True, True],
            n_batch=1,
        )
        cls = Classifier(gene_dataset.nb_genes, n_labels=2, logits=True)
        trainer = JVAETrainer(
            jvae,
            cls,
            [gene_dataset, gene_dataset],
            train_size=0.5,
            use_cuda=False,
            data_loader_kwargs={"batch_size": 8},
        )
        trainer.train(n_epochs=1)

        totalvae = TOTALVI(gene_dataset.nb_genes,
                           len(gene_dataset.protein_names))
        trainer = TotalTrainer(
            totalvae,
            gene_dataset,
            train_size=0.5,
            use_cuda=False,
            data_loader_kwargs={"batch_size": 8},
            early_stopping_kwargs=None,
        )
        trainer.train(n_epochs=1)
示例#9
0
    def test_genes_to_idx(self):
        data = np.random.randint(1, 5, size=(5, 10))
        gene_names = np.array(["gene_%d" % i for i in range(10)])

        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data, gene_names=gene_names)
        indices = dataset.genes_to_index(["GENE_%d" % i for i in range(10)])
        self.assertListEqual([i for i in range(10)], indices.tolist())
示例#10
0
 def test_filter_cells(self):
     data = np.ones((25, 10)) * 100
     data[4:6, :] = 0
     dataset = GeneExpressionDataset()
     dataset.populate_from_data(data)
     self.assertEqual(25, dataset.nb_cells)
     dataset.filter_cells_by_count()
     self.assertEqual(23, dataset.nb_cells)
示例#11
0
    def test_filter_genes(self):
        data = np.random.randint(1, 5, size=(5, 10))
        gene_names = np.array(["gene_%d" % i for i in range(10)])

        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data, gene_names=gene_names)
        gene_names_true = ["GENE_1", "GENE_3"]
        dataset.filter_genes_by_attribute(gene_names_true)
        self.assertListEqual(gene_names_true, dataset.gene_names.tolist())
示例#12
0
    def test_compute_library_size_batch(self):
        data = np.exp(10) / 10 * np.ones((7, 10), dtype=int)
        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data)

        local_means_true = [[10.0] for _ in range(7)]
        local_vars_true = [[0.0] for _ in range(7)]
        self.assertEqual(local_means_true, dataset.local_means.tolist())
        self.assertEqual(local_vars_true, dataset.local_vars.tolist())
示例#13
0
    def test_remap_categorical_attributes(self):
        data = np.random.randint(1, 5, size=(7, 11))
        labels = [1, 1, 1, 1, 1, 2, 2]
        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data, labels=labels)

        labels_true = [0, 0, 0, 0, 0, 1, 1]
        labels_true = [[i] for i in labels_true]
        self.assertListEqual(labels_true, dataset.labels.tolist())
示例#14
0
    def test_filter_cell_types(self):
        data = np.random.randint(1, 5, size=(5, 10))
        labels = [0, 0, 1, 1, 1]
        cell_types = ["0", "1"]

        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data, labels=labels, cell_types=cell_types)
        dataset.filter_cell_types(["0"])
        self.assertListEqual(data[:2].tolist(), dataset.X.tolist())
示例#15
0
    def test_collate_normal(self):
        data = np.ones((25, 2)) * np.arange(0, 25).reshape((-1, 1))
        batch_indices = np.arange(0, 25).reshape((-1, 1))
        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data, batch_indices=batch_indices)

        collate_fn = dataset.collate_fn_builder()
        x, mean, var, batch, labels = collate_fn([1, 2])
        self.assertListEqual(x.tolist(), [[1.0, 1.0], [2.0, 2.0]])
        self.assertListEqual(batch.tolist(), [[1], [2]])
示例#16
0
    def test_populate_from_datasets_dummy_data(self):
        data1 = np.random.randint(1, 5, size=(5, 10))
        gene_names1 = np.array(["gene_%d" % i for i in range(10)])
        dataset1 = GeneExpressionDataset()
        dataset1.populate_from_data(data1, gene_names=gene_names1)
        data2 = np.random.randint(1, 5, size=(7, 3))
        gene_names2 = np.array(["gene_%d" % i for i in range(3)])
        dataset2 = GeneExpressionDataset()
        dataset2.populate_from_data(data2, gene_names=gene_names2)
        data3 = np.random.randint(1, 5, size=(2, 5))
        gene_names3 = np.array(["gene_%d" % i for i in range(5)])
        dataset3 = GeneExpressionDataset()
        dataset3.populate_from_data(data3, gene_names=gene_names3)

        dataset = GeneExpressionDataset()
        dataset.populate_from_datasets([dataset1, dataset2, dataset3])
        self.assertEqual(14, dataset.nb_cells)
        self.assertEqual(3, dataset.nb_genes)
        self.assertListEqual(["GENE_0", "GENE_1", "GENE_2"],
                             dataset.gene_names.tolist())

        # test for labels sharing
        dataset2.labels = [0, 0, 0, 1, 1, 1, 1]
        dataset2.initialize_mapped_attribute("labels", "cell_types",
                                             ["0", "1"])
        dataset3.labels = [0, 1]
        dataset3.initialize_mapped_attribute("labels", "cell_types",
                                             ["0", "2"])
        dataset = GeneExpressionDataset()
        dataset.populate_from_datasets([dataset2, dataset3],
                                       shared_labels=True)
        self.assertListEqual(
            np.squeeze(dataset.labels).tolist(), [0, 0, 0, 1, 1, 1, 1, 0, 2])
        self.assertListEqual(dataset.cell_types, ["0", "1", "2"])

        dataset_unshared = GeneExpressionDataset()
        dataset_unshared.populate_from_datasets([dataset2, dataset3],
                                                shared_labels=False)
        self.assertListEqual(
            np.squeeze(dataset_unshared.labels).tolist(),
            [0, 0, 0, 1, 1, 1, 1, 2, 3])
        self.assertListEqual(dataset_unshared.cell_types, ["0", "1", "0", "2"])

        # test for batch_indices offsetting
        dataset2.batch_indices = [0, 0, 0, 1, 1, 1, 1]
        dataset2.initialize_mapped_attribute("batch_indices", "experiment",
                                             ["fish_2", "scrna_2"])
        dataset3.batch_indices = [0, 1]
        dataset3.initialize_mapped_attribute("batch_indices", "experiment",
                                             ["fish_3", "scrna_3"])
        dataset = GeneExpressionDataset()
        dataset.populate_from_datasets([dataset2, dataset3])
        self.assertListEqual(
            np.squeeze(dataset.batch_indices).tolist(),
            [0, 0, 0, 1, 1, 1, 1, 2, 3])
        self.assertListEqual(getattr(dataset, "experiment"),
                             ["fish_2", "scrna_2", "fish_3", "scrna_3"])
示例#17
0
    def test_populate_from_datasets_gene_attributes_merging(self):
        data = np.random.randint(1, 5, size=(5, 10))
        gene_names = np.array(["gene_%d" % i for i in range(10)])
        gene_attr1 = np.array([["1"] for _ in range(10)])
        gene_attr2 = np.array([["2"] for _ in range(10)])
        dataset1 = GeneExpressionDataset()
        dataset2 = GeneExpressionDataset()

        dataset1.populate_from_data(data,
                                    gene_names=gene_names,
                                    gene_attributes_dict={"test": gene_attr1})
        dataset2.populate_from_data(data,
                                    gene_names=gene_names,
                                    gene_attributes_dict={"test": gene_attr2})

        dataset = GeneExpressionDataset()
        dataset.populate_from_datasets([dataset1, dataset2])

        # Should keep the gene attribute of the first dataset
        self.assertEqual(dataset.test[0, 0], "1")
示例#18
0
    def __init__(self, data, name, n_latent=10, reconstruction_seq='zinb'):
        super().__init__(data, name, n_latent)

        self.full_dataset = GeneExpressionDataset()

        self.full_dataset.populate_from_datasets([
            copy.deepcopy(data.data_fish_partial),
            copy.deepcopy(data.data_seq)
        ])
        self.full_dataset.compute_library_size_batch()
        self.reconstruction_seq = reconstruction_seq
示例#19
0
    def test_populate_from_data(self):
        data = np.ones((25, 10)) * 100
        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data)

        self.assertEqual(dataset.nb_genes, 10)
        self.assertEqual(dataset.nb_cells, 25)
        # default batch_indices and labels
        self.assertListEqual([[0] for i in range(25)],
                             dataset.batch_indices.tolist())
        self.assertListEqual([[0] for i in range(25)], dataset.labels.tolist())
示例#20
0
    def test_populate_from_datasets_cell_attributes_merging(self):
        data = np.random.randint(1, 5, size=(5, 10))
        gene_names = np.array(["gene_%d" % i for i in range(10)])
        cell_attr1 = np.array([["1"] for _ in range(5)])
        cell_attr2 = np.array([["2"] for _ in range(5)])
        dataset1 = GeneExpressionDataset()
        dataset2 = GeneExpressionDataset()

        dataset1.populate_from_data(data,
                                    gene_names=gene_names,
                                    cell_attributes_dict={"test": cell_attr1})
        dataset2.populate_from_data(data,
                                    gene_names=gene_names,
                                    cell_attributes_dict={"test": cell_attr2})

        dataset = GeneExpressionDataset()
        dataset.populate_from_datasets([dataset1, dataset2])
        self.assertTupleEqual(dataset.test.shape, (10, 1))
        self.assertListEqual(
            np.squeeze(dataset.test).tolist(), ["1"] * 5 + ["2"] * 5)
示例#21
0
    def test_dense_subsample_genes(self):
        data = [
            np.random.randint(1, 5, size=(50, 26)),
            np.random.randint(1, 5, size=(50, 26)),
            np.random.randint(1, 5, size=(50, 26)),
        ]

        # With default
        dataset = GeneExpressionDataset()
        dataset.populate_from_per_batch_list(data)
        n_genes = dataset.nb_genes
        n_top = n_genes // 2
        dataset.subsample_genes(new_n_genes=n_top)
        assert dataset.nb_genes < n_genes
        # For some reason the new number of genes can be slightly different than n_top

        # With Seurat
        dataset = GeneExpressionDataset()
        dataset.populate_from_per_batch_list(data)
        dataset.subsample_genes(new_n_genes=n_top, mode="seurat")
        assert dataset.nb_genes < n_genes
示例#22
0
    def test_map_cell_types(self):
        data = np.random.randint(1, 5, size=(7, 10))
        labels = [0, 0, 4, 4, 2, 3, 5]
        cell_types = ["0", "1", "2", "3", "4", "5"]

        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data, labels=labels, cell_types=cell_types)
        dataset.map_cell_types({("0", "2"): "6", ("3", "4"): "7"})
        dataset.remap_categorical_attributes()
        self.assertListEqual(dataset.cell_types.tolist(), ["5", "6", "7"])
        self.assertListEqual(
            np.squeeze(dataset.labels).tolist(), [1, 1, 2, 2, 1, 2, 0])
示例#23
0
文件: test_scvi.py 项目: tkisss/scVI
def test_multibatches_features():
    data = [
        np.random.randint(1, 5, size=(20, 10)),
        np.random.randint(1, 10, size=(20, 10)),
        np.random.randint(1, 10, size=(20, 10)),
        np.random.randint(1, 10, size=(30, 10)),
    ]
    dataset = GeneExpressionDataset()
    dataset.populate_from_per_batch_list(data)
    vae = VAE(dataset.nb_genes, dataset.n_batches)
    trainer = UnsupervisedTrainer(vae, dataset, train_size=0.5, use_cuda=use_cuda)
    trainer.train(n_epochs=2)
    trainer.test_set.imputation(n_samples=2, transform_batch=0)
    trainer.train_set.imputation(n_samples=2, transform_batch=[0, 1, 2])
示例#24
0
    def test_reorder_genes(self):
        data = np.ones((25, 100)) * 100

        gene_names = np.array(["gene_%d" % i for i in range(100)])
        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data, gene_names=gene_names)
        dataset.reorder_genes(["GENE_47", "GENE_2", "GENE_3", "GENE_12"])
        # New order should be 47, 2, 3, 12, 0, 1, ...
        self.assertListEqual(
            list(dataset.gene_names[0:6]),
            ["GENE_47", "GENE_2", "GENE_3", "GENE_12", "GENE_0", "GENE_1"],
        )

        self.assertRaises(KeyError, dataset.reorder_genes, ["GENE_101"])
示例#25
0
    def test_merge_cell_types(self):
        data = np.random.randint(1, 5, size=(8, 20))
        labels = [0, 0, 1, 2, 2, 1, 0, 1]
        cell_types = ["0", "1", "2"]

        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data, labels=labels, cell_types=cell_types)
        dataset.merge_cell_types(["0", "1"], new_cell_type_name="0 and 1")
        self.assertListEqual([[3], [3], [3], [2], [2], [3], [3], [3]],
                             dataset.labels.tolist())
        dataset.remap_categorical_attributes()
        self.assertListEqual([[1], [1], [1], [0], [0], [1], [1], [1]],
                             dataset.labels.tolist())
        self.assertListEqual(["2", "0 and 1"], dataset.cell_types.tolist())
示例#26
0
    def test_subsample_cells(self):
        data = np.arange(1, 6)[:, None] * np.ones(7)[None, :]

        dataset = GeneExpressionDataset()
        dataset.populate_from_data(data)
        # default
        dataset.subsample_cells()
        self.assertEqual(5, dataset.nb_cells)
        # when size is a float
        dataset.subsample_cells(size=0.8)
        data_true = np.arange(5, 1, -1)[:, None] * np.ones(7)[None, :]
        self.assertListEqual(data_true.tolist(), dataset.X.tolist())
        # when size is an int
        dataset.subsample_cells(size=2)
        self.assertEqual(2, dataset.nb_cells)
示例#27
0
 def test_populate_from_per_label_list(self):
     data = [
         np.random.randint(1, 5, size=(7, 10)),
         np.random.randint(1, 5, size=(5, 10)),
         np.random.randint(1, 5, size=(3, 10)),
     ]
     dataset = GeneExpressionDataset()
     dataset.populate_from_per_label_list(data)
     self.assertEqual(dataset.nb_cells, 15)
     self.assertEqual(dataset.nb_genes, 10)
     true_labels = np.concatenate([
         np.zeros((7, 1), dtype=int),
         np.ones((5, 1), dtype=int),
         2 * np.ones((3, 1), dtype=int),
     ])
     self.assertListEqual(dataset.labels.tolist(), true_labels.tolist())
示例#28
0
 def test_data_loader(self):
     data = np.ones((25, 10)) * 100
     paired = np.ones((25, 4)) * np.arange(0, 4)
     pair_names = ["gabou", "achille", "pedro", "oclivio"]
     y = CellMeasurement(name="dev",
                         data=paired,
                         columns_attr_name="dev_names",
                         columns=pair_names)
     dataset = GeneExpressionDataset()
     dataset.populate_from_data(data, Ys=[y])
     ad = dataset.to_anndata()
     dataset_ad = AnnDatasetFromAnnData(
         ad, cell_measurements_col_mappings={"dev": "dev_names"})
     self.assertTrue((paired == dataset_ad.dev).all())
     self.assertTrue((dataset.X == dataset_ad.X).all())
     self.assertTrue((dataset.cell_types == dataset_ad.cell_types).all())
示例#29
0
def training_score_scvi(train, **kwargs):
    from scvi.dataset import GeneExpressionDataset
    from scvi.inference import UnsupervisedTrainer
    from scvi.models import VAE
    data = GeneExpressionDataset(
        *GeneExpressionDataset.get_attributes_from_matrix(train))
    vae = VAE(n_input=train.shape[1])
    m = UnsupervisedTrainer(vae, data, verbose=False)
    m.train(n_epochs=100)
    # Training permuted the data for minibatching. Unpermute before "imputing"
    # (estimating lambda)
    lam = np.vstack([
        m.train_set.sequential().imputation(),
        m.test_set.sequential().imputation()
    ])
    return st.poisson(mu=lam).logpmf(train).sum()
示例#30
0
 def test_populate_from_datasets_cortex(self):
     cortex_dataset_1 = CortexDataset(save_path="tests/data")
     cortex_dataset_1.subsample_genes(subset_genes=np.arange(0, 3),
                                      mode="variance")
     cortex_dataset_1.filter_cell_types(["microglia", "oligodendrocytes"])
     cortex_dataset_2 = CortexDataset(save_path="tests/data")
     cortex_dataset_2.subsample_genes(subset_genes=np.arange(1, 4),
                                      mode="variance")
     cortex_dataset_2.filter_cell_types([
         "endothelial-mural", "interneurons", "microglia",
         "oligodendrocytes"
     ])
     cortex_dataset_2.filter_cell_types([2, 0])
     dataset = GeneExpressionDataset()
     dataset.populate_from_datasets([cortex_dataset_1, cortex_dataset_2])
     self.assertEqual(2, dataset.nb_genes)