示例#1
0
 def setUpClass(self):
     self.X_dense = data.load_10X(sparse=False)
     self.X_sparse = data.load_10X(sparse=True)
     self.X_numpy = self.X_dense.to_numpy()
     self.X_coo = self.X_sparse.sparse.to_coo()
     self.cell_names = self.X_dense.index
     self.gene_names = self.X_dense.columns
示例#2
0
def test_10X():
    X = data.load_10X()
    assert X.shape == (100, 100)
    assert isinstance(X, pd.SparseDataFrame)
    assert X.columns[0] == "Arl8b"
    X = data.load_10X(gene_labels='id', sparse=False)
    assert X.shape == (100, 100)
    assert isinstance(X, pd.DataFrame)
    assert not isinstance(X, pd.SparseDataFrame)
    assert X.columns[0] == "ENSMUSG00000030105"
    X = data.load_10X(gene_labels='both')
    assert X.shape == (100, 100)
    assert isinstance(X, pd.SparseDataFrame)
    assert X.columns[0] == "Arl8b (ENSMUSG00000030105)"
    assert_raise_message(ValueError, "gene_labels='invalid' not recognized. "
                         "Choose from ['symbol', 'id', 'both']",
                         data.load_10X,
                         gene_labels='invalid')
    assert_raise_message(
        FileNotFoundError, "{} is not a directory".format(
            os.path.join(data.data_dir, "test_10X.zip")), scprep.io.load_10X,
        os.path.join(data.data_dir, "test_10X.zip"))
    assert_raise_message(
        FileNotFoundError,
        "'matrix.mtx', 'genes.tsv', and 'barcodes.tsv' must be present "
        "in {}".format(data.data_dir), scprep.io.load_10X, data.data_dir)
示例#3
0
def test_gene_expression_filter_sample_label():
    X = data.load_10X(sparse=False)
    genes = np.arange(10)
    sample_labels = pd.DataFrame(np.arange(X.shape[0]), index=X.index)
    X_filtered, sample_labels = scprep.filter.filter_gene_set_expression(
        X, genes, percentile=90, sample_labels=sample_labels)
    assert X_filtered.shape[0] == len(sample_labels)
示例#4
0
def test_combine_batches_uncommon_genes():
    X = data.load_10X()
    Y = X.iloc[:, :X.shape[1] // 2]
    assert_warns_message(
        UserWarning, "Input data has inconsistent column names. "
        "Subsetting to {} common columns.".format(Y.shape[1]),
        scprep.utils.combine_batches, [X, Y], ['x', 'y'])
示例#5
0
def test_combine_batches():
    X = data.load_10X()
    Y = pd.concat([X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))])
    Y2, sample_labels = scprep.utils.combine_batches(
        [X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))],
        batch_labels=[0, 1])
    assert utils.assert_matrix_class_equivalent(Y, Y2)
    utils.assert_all_equal(Y, Y2)
    assert np.all(Y.index == Y2.index)
    assert np.all(sample_labels == np.concatenate(
        [np.repeat(0, X.shape[0]),
         np.repeat(1, X.shape[0] // 2)]))
    Y2, sample_labels = scprep.utils.combine_batches(
        [X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))],
        batch_labels=[0, 1],
        append_to_cell_names=True)
    assert np.all(Y.index == np.array([i[:-2] for i in Y2.index]))
    assert np.all(
        np.core.defchararray.add("_", sample_labels.astype(str)) == np.array(
            [i[-2:] for i in Y2.index], dtype=str))
    transform = lambda X: scprep.utils.combine_batches(
        [X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))],
        batch_labels=[0, 1])[0]
    matrix.test_matrix_types(X,
                             utils.assert_transform_equals,
                             matrix._indexable_matrix_types,
                             Y=Y,
                             transform=transform,
                             check=utils.assert_all_equal)
示例#6
0
def test_combine_batches_errors():
    X = data.load_10X()
    assert_warns_message(
        UserWarning, "append_to_cell_names only valid for pd.DataFrame input. "
        "Got coo_matrix",
        scprep.utils.combine_batches,
        [X.to_coo(), X.iloc[:X.shape[0] // 2].to_coo()],
        batch_labels=[0, 1],
        append_to_cell_names=True)
    assert_raise_message(
        TypeError,
        "Expected data all of the same class. Got SparseDataFrame, coo_matrix",
        scprep.utils.combine_batches, [X, X.iloc[:X.shape[0] // 2].to_coo()],
        batch_labels=[0, 1])
    assert_raise_message(
        ValueError,
        "Expected data all with the same number of columns. "
        "Got {}, {}".format(X.shape[1], X.shape[1] // 2),
        scprep.utils.combine_batches,
        [X, scprep.utils.select_cols(X, np.arange(X.shape[1] // 2))],
        batch_labels=[0, 1])
    assert_raise_message(
        ValueError,
        "Expected data (2) and batch_labels (1) to be the same length.",
        scprep.utils.combine_batches,
        [X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))],
        batch_labels=[0])
    assert_raise_message(ValueError,
                         "Expected data to contain pandas DataFrames, "
                         "scipy sparse matrices or numpy arrays. Got str",
                         scprep.utils.combine_batches, ["hello", "world"],
                         batch_labels=[0, 1])
示例#7
0
def test_10X_HDF5():
    X = data.load_10X()
    # tables backend
    h5_file = os.path.join(data.data_dir, "test_10X.h5")
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file)
    assert isinstance(X_hdf5, pd.SparseDataFrame)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    # hdf5 backend
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file, backend='h5py')
    assert isinstance(X_hdf5, pd.SparseDataFrame)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    assert_raise_message(ValueError,
                         "Genome invalid not found in {}. "
                         "Available genomes: GRCh38".format(h5_file),
                         scprep.io.load_10X_HDF5,
                         filename=h5_file,
                         genome="invalid")
    assert_raise_message(ValueError,
                         "Expected backend in ['tables', 'h5py']. Got invalid",
                         scprep.io.load_10X_HDF5,
                         filename=h5_file,
                         backend="invalid")
    assert_raise_message(ValueError, "gene_labels='invalid' not recognized. "
                         "Choose from ['symbol', 'id', 'both']",
                         scprep.io.load_10X_HDF5,
                         filename=h5_file,
                         gene_labels='invalid')
示例#8
0
def test_deprecated():
    X = data.load_10X()
    assert_warns_message(
        FutureWarning, "`scprep.utils.select_cols` is deprecated. Use "
        "`scprep.select.select_cols` instead.", scprep.utils.select_cols, X,
        [1, 2, 3])
    assert_warns_message(
        FutureWarning, "`scprep.utils.select_rows` is deprecated. Use "
        "`scprep.select.select_rows` instead.", scprep.utils.select_rows, X,
        [1, 2, 3])
    assert_warns_message(FutureWarning,
                         "`scprep.utils.get_gene_set` is deprecated. Use "
                         "`scprep.select.get_gene_set` instead.",
                         scprep.utils.get_gene_set,
                         X,
                         starts_with="D")
    assert_warns_message(FutureWarning,
                         "`scprep.utils.get_cell_set` is deprecated. Use "
                         "`scprep.select.get_cell_set` instead.",
                         scprep.utils.get_cell_set,
                         X,
                         starts_with="A")
    assert_warns_message(FutureWarning,
                         "`scprep.utils.subsample` is deprecated. Use "
                         "`scprep.select.subsample` instead.",
                         scprep.utils.subsample,
                         X,
                         n=10)
示例#9
0
def test_10X_HDF5_cellranger3():
    X = data.load_10X()
    h5_file = os.path.join(data.data_dir, "test_10X_cellranger3.h5")
    # explicit tables backend
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file, backend="tables")
    assert scprep.utils.is_sparse_dataframe(X_hdf5)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    # explicit h5py backend
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file, backend="h5py")
    assert scprep.utils.is_sparse_dataframe(X_hdf5)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    # automatic tables backend
    with mock.patch.dict(sys.modules, {"h5py": None}):
        X_hdf5 = scprep.io.load_10X_HDF5(h5_file)
        assert scprep.utils.is_sparse_dataframe(X_hdf5)
        assert np.sum(np.sum(X != X_hdf5)) == 0
        np.testing.assert_array_equal(X.columns, X_hdf5.columns)
        np.testing.assert_array_equal(X.index, X_hdf5.index)
    # automatic h5py backend
    with mock.patch.dict(sys.modules, {"tables": None}):
        X_hdf5 = scprep.io.load_10X_HDF5(h5_file)
        assert scprep.utils.is_sparse_dataframe(X_hdf5)
        assert np.sum(np.sum(X != X_hdf5)) == 0
        np.testing.assert_array_equal(X.columns, X_hdf5.columns)
        np.testing.assert_array_equal(X.index, X_hdf5.index)
示例#10
0
def test_mean_difference():
    X = data.load_10X()
    X = scprep.filter.filter_empty_genes(X)
    Y = scprep.stats.mean_difference(X.iloc[:20], X.iloc[20:100])
    assert np.allclose(np.max(Y), 16.8125)
    assert np.allclose(np.min(Y), -0.5625)

    def test_fun(X, **kwargs):
        return scprep.stats.mean_difference(
            scprep.select.select_rows(X, idx=np.arange(20)),
            scprep.select.select_rows(X, idx=np.arange(20, 100)),
            **kwargs,
        )

    matrix.test_all_matrix_types(
        X,
        utils.assert_transform_equals,
        Y=Y,
        transform=test_fun,
        check=utils.assert_all_close,
    )
    utils.assert_raises_message(
        ValueError,
        "Expected X and Y to have the same number of columns. "
        "Got shapes {}, {}".format(X.shape, X.iloc[:, :10].shape),
        scprep.stats.mean_difference,
        X,
        X.iloc[:, :10],
    )
示例#11
0
def test_check_numeric_inplace():
    X = data.load_10X()
    matrix.test_matrix_types(
        X,
        utils.assert_transform_unchanged,
        matrix._scipy_matrix_types + matrix._numpy_matrix_types +
        matrix._pandas_dense_matrix_types + [matrix.SparseDataFrame],
        transform=scprep.sanitize.check_numeric,
        copy=False,
    )
    if matrix._pandas_0:
        matrix._ignore_pandas_sparse_warning()
        utils.assert_raises_message(
            TypeError,
            "pd.SparseDataFrame does not support "
            "copy=False. Please use copy=True.",
            scprep.sanitize.check_numeric,
            data=matrix.SparseDataFrame_deprecated(X),
            copy=False,
        )
        matrix._reset_warnings()

    class TypeErrorClass(object):
        def astype(self, dtype):
            return

    X = TypeErrorClass()
    utils.assert_raises_message(
        TypeError,
        "astype() got an unexpected keyword argument 'copy'",
        scprep.sanitize.check_numeric,
        data=X,
        copy=None,
    )
示例#12
0
def test_10X_HDF5_cellranger3():
    X = data.load_10X()
    h5_file = os.path.join(data.data_dir, "test_10X_cellranger3.h5")
    # automatic tables backend
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file)
    assert isinstance(X_hdf5, pd.SparseDataFrame)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    # explicit tables backend
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file, backend='tables')
    assert isinstance(X_hdf5, pd.SparseDataFrame)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    # explicit h5py backend
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file, backend='h5py')
    assert isinstance(X_hdf5, pd.SparseDataFrame)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    # automatic h5py backend
    tables = scprep.io.hdf5.tables
    del scprep.io.hdf5.tables
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file)
    assert isinstance(X_hdf5, pd.SparseDataFrame)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    scprep.io.hdf5.tables = tables
示例#13
0
def test_10X_HDF5():
    X = data.load_10X()
    h5_file = os.path.join(data.data_dir, "test_10X.h5")
    # automatic tables backend
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file)
    assert scprep.utils.is_sparse_dataframe(X_hdf5)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    # explicit tables backend
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file, backend="tables")
    assert scprep.utils.is_sparse_dataframe(X_hdf5)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    # explicit h5py backend
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file, backend="h5py")
    assert scprep.utils.is_sparse_dataframe(X_hdf5)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    # automatic h5py backend
    tables = scprep.io.hdf5.tables
    del scprep.io.hdf5.tables
    X_hdf5 = scprep.io.load_10X_HDF5(h5_file)
    assert scprep.utils.is_sparse_dataframe(X_hdf5)
    assert np.sum(np.sum(X != X_hdf5)) == 0
    np.testing.assert_array_equal(X.columns, X_hdf5.columns)
    np.testing.assert_array_equal(X.index, X_hdf5.index)
    scprep.io.hdf5.tables = tables
示例#14
0
def test_check_numeric_copy():
    X = data.load_10X()
    matrix.test_all_matrix_types(
        X,
        utils.assert_transform_unchanged,
        transform=scprep.sanitize.check_numeric,
        copy=True,
    )
示例#15
0
def test_10X_zip_url():
    X = data.load_10X()
    filename = "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X.zip"
    X_zip = scprep.io.load_10X_zip(filename)
    assert scprep.utils.is_sparse_dataframe(X_zip)
    assert np.sum(np.sum(X != X_zip)) == 0
    np.testing.assert_array_equal(X.columns, X_zip.columns)
    np.testing.assert_array_equal(X.index, X_zip.index)
示例#16
0
def test_remove_empty_cells_sample_label():
    X = data.load_10X(sparse=False)
    sample_labels = np.arange(X.shape[0])
    sample_labels_filt = sample_labels[X.sum(1) > 0]
    X_filtered, sample_labels = scprep.filter.remove_empty_cells(
        X, sample_labels=sample_labels)
    assert X_filtered.shape[0] == len(sample_labels)
    assert np.all(sample_labels == sample_labels_filt)
示例#17
0
def test_10X_zip(filename):
    X = data.load_10X()
    filename = os.path.join(data.data_dir, filename)
    X_zip = scprep.io.load_10X_zip(filename)
    assert scprep.utils.is_sparse_dataframe(X_zip)
    assert np.sum(np.sum(X != X_zip)) == 0
    np.testing.assert_array_equal(X.columns, X_zip.columns)
    np.testing.assert_array_equal(X.index, X_zip.index)
示例#18
0
def test_remove_empty_cells():
    X = data.load_10X(sparse=False)
    X_filtered = scprep.filter.remove_empty_cells(X)
    assert X_filtered.shape[1] == X.shape[1]
    assert not np.any(X_filtered.sum(1) == 0)
    matrix.test_all_matrix_types(X,
                                 utils.assert_transform_equals,
                                 Y=X_filtered,
                                 transform=scprep.filter.remove_empty_cells)
示例#19
0
def test_download_url():
    X = data.load_10X()
    scprep.io.download.download_url(
        "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X/matrix.mtx.gz",
        "url_test.mtx.gz",
    )
    Y = scprep.io.load_mtx("url_test.mtx.gz").T
    assert (X.sparse.to_coo() - Y).nnz == 0
    os.remove("url_test.mtx.gz")
示例#20
0
def test_library_size_filter_error():
    X = data.load_10X(sparse=True)
    assert_raise_message(
        ValueError,
        "Expected `keep_cells` in ['above', 'below']. Got invalid",
        scprep.filter.filter_library_size,
        X,
        100,
        keep_cells='invalid')
示例#21
0
def test_remove_rare_genes():
    X = data.load_10X(sparse=False)
    X_filtered = scprep.filter.remove_rare_genes(X)
    assert X_filtered.shape[0] == X.shape[0]
    assert not np.any(X_filtered.sum(0) < 5)
    matrix.test_all_matrix_types(X,
                                 utils.assert_transform_equals,
                                 Y=X_filtered,
                                 transform=scprep.filter.remove_rare_genes)
示例#22
0
def test_10X():
    X = data.load_10X()
    assert X.shape == (100, 100)
    assert scprep.utils.is_sparse_dataframe(X)
    assert X.columns[0] == "Arl8b"
    X = data.load_10X(gene_labels="id", sparse=False)
    assert X.shape == (100, 100)
    assert isinstance(X, pd.DataFrame)
    assert not scprep.utils.is_sparse_dataframe(X)
    assert X.columns[0] == "ENSMUSG00000030105"
    X = data.load_10X(gene_labels="both")
    assert X.shape == (100, 100)
    assert scprep.utils.is_sparse_dataframe(X)
    assert X.columns[0] == "Arl8b (ENSMUSG00000030105)"
    X_cellranger3 = scprep.io.load_10X(os.path.join(data.data_dir,
                                                    "test_10X_cellranger3"),
                                       gene_labels="both")
    np.testing.assert_array_equal(X.index, X_cellranger3.index)
    np.testing.assert_array_equal(X.columns, X_cellranger3.columns)
    np.testing.assert_array_equal(X.index, X_cellranger3.index)
    utils.assert_raises_message(
        ValueError,
        "gene_labels='invalid' not recognized. "
        "Choose from ['symbol', 'id', 'both']",
        data.load_10X,
        gene_labels="invalid",
    )
    utils.assert_raises_message(
        FileNotFoundError,
        "{} is not a directory".format(
            os.path.join(data.data_dir, "test_10X.zip")),
        scprep.io.load_10X,
        os.path.join(data.data_dir, "test_10X.zip"),
    )
    utils.assert_raises_message(
        FileNotFoundError,
        "'matrix.mtx(.gz)', '[genes/features].tsv(.gz)', and "
        "'barcodes.tsv(.gz)' must be present "
        "in {}".format(data.data_dir),
        scprep.io.load_10X,
        data.data_dir,
    )
示例#23
0
def test_gene_expression_filter_warning():
    X = data.load_10X(sparse=True)
    genes = np.arange(10)
    gene_outside_range = 100
    no_genes = 'not_a_gene'
    assert_warns_message(UserWarning,
                         "`percentile` expects values between 0 and 100."
                         "Got 0.9. Did you mean 90.0?",
                         scprep.filter.filter_gene_set_expression,
                         X,
                         genes,
                         percentile=0.90,
                         keep_cells='below')
    assert_raise_message(
        ValueError,
        "Only one of `cutoff` and `percentile` should be given.",
        scprep.filter.filter_gene_set_expression,
        X,
        genes,
        percentile=0.90,
        cutoff=50)
    assert_raise_message(ValueError,
                         "Expected `keep_cells` in ['above', 'below']. "
                         "Got neither",
                         scprep.filter.filter_gene_set_expression,
                         X,
                         genes,
                         percentile=90.0,
                         keep_cells='neither')
    assert_warns_message(UserWarning,
                         "`percentile` expects values between 0 and 100."
                         "Got 0.9. Did you mean 90.0?",
                         scprep.filter.filter_gene_set_expression,
                         X,
                         genes,
                         percentile=0.90,
                         keep_cells='below')
    assert_raise_message(
        ValueError,
        "One of either `cutoff` or `percentile` must be given.",
        scprep.filter.filter_gene_set_expression,
        X,
        genes,
        cutoff=None,
        percentile=None)
    assert_raise_message(KeyError,
                         "the label [not_a_gene] is not in the [columns]",
                         scprep.filter.filter_gene_set_expression,
                         X,
                         no_genes,
                         percentile=90.0,
                         keep_cells='below')
    assert_warns_message(UserWarning, "Selecting 0 columns",
                         scprep.utils.select_cols, X, (X.sum(axis=0) < 0))
示例#24
0
def test_library_size_filter():
    X = data.load_10X(sparse=True)
    X_filtered = scprep.filter.filter_library_size(X, 100)
    assert X_filtered.shape[1] == X.shape[1]
    assert not np.any(X_filtered.sum(1) <= 100)
    matrix.test_all_matrix_types(X,
                                 utils.assert_transform_equals,
                                 Y=X_filtered,
                                 transform=partial(
                                     scprep.filter.filter_library_size,
                                     cutoff=100))
示例#25
0
def test_differential_expression_error():
    X = data.load_10X()
    utils.assert_raises_message(
        ValueError,
        "Expected `direction` in ['up', 'down', 'both']. "
        "Got invalid",
        scprep.stats.differential_expression,
        X,
        X,
        direction="invalid",
    )
    utils.assert_raises_message(
        ValueError,
        "Expected `measure` in ['difference', 'emd', 'ttest', 'ranksum']. "
        "Got invalid",
        scprep.stats.differential_expression,
        X,
        X,
        measure="invalid",
    )
    utils.assert_raises_message(
        ValueError,
        "Expected `X` and `Y` to be matrices. "
        "Got shapes {}, {}".format(X.shape, X.iloc[0].shape),
        scprep.stats.differential_expression,
        X,
        X.iloc[0],
    )
    utils.assert_raises_message(
        ValueError,
        "Expected gene_names to have length {}. "
        "Got {}".format(X.shape[0], X.shape[0] // 2),
        scprep.stats.differential_expression,
        X.sparse.to_coo(),
        X.sparse.to_coo(),
        gene_names=np.arange(X.shape[0] // 2),
    )
    utils.assert_raises_message(
        ValueError,
        "Expected gene_names to have length {}. "
        "Got {}".format(X.shape[0], X.shape[0] // 2),
        scprep.stats.differential_expression_by_cluster,
        X.sparse.to_coo(),
        np.random.choice(2, X.shape[0], replace=True),
        gene_names=np.arange(X.shape[0] // 2),
    )
    utils.assert_warns_message(
        UserWarning,
        "Input data has inconsistent column names. "
        "Subsetting to 20 common columns.",
        scprep.stats.differential_expression,
        X,
        X.iloc[:, :20],
    )
示例#26
0
def test_download_zip():
    X = data.load_10X()
    scprep.io.download.download_and_extract_zip(
        "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X.zip",
        "zip_test",
    )
    Y = scprep.io.load_10X("zip_test/test_10X")
    assert np.all(X == Y)
    assert np.all(X.index == Y.index)
    assert np.all(X.columns == Y.columns)
    shutil.rmtree("zip_test")
示例#27
0
def test_combine_batches_rangeindex():
    X = data.load_10X()
    X = X.reset_index(drop=True)
    Y = X.iloc[:X.shape[0] // 2]
    data_combined, labels = scprep.utils.combine_batches([X, Y], ["x", "y"])
    assert isinstance(data_combined.index, pd.RangeIndex)
    assert np.all(np.sort(data_combined.columns) == np.sort(X.columns))
    assert np.all(data_combined.iloc[:100][np.sort(X.columns)].to_numpy() == X[
        np.sort(X.columns)].to_numpy())
    assert np.all(data_combined.iloc[100:][np.sort(X.columns)].to_numpy() == Y[
        np.sort(X.columns)].to_numpy())
示例#28
0
def test_unzip_destination():
    X = data.load_10X()
    filename = os.path.join(data.data_dir, "test_10X.zip")
    tmp_filename = "zip_extract_test.zip"
    shutil.copyfile(filename, tmp_filename)
    scprep.io.download.unzip(tmp_filename, destination="zip_test")
    assert not os.path.isfile(tmp_filename)
    Y = scprep.io.load_10X("zip_test/test_10X")
    assert np.all(X == Y)
    assert np.all(X.index == Y.index)
    assert np.all(X.columns == Y.columns)
    shutil.rmtree("zip_test")
示例#29
0
def test_plot_histogram():
    X = data.load_10X()
    scprep.plot.plot_library_size(X, cutoff=1000, log=True)
    fig, ax = plt.subplots()
    scprep.plot.plot_gene_set_expression(X,
                                         genes=scprep.utils.get_gene_set(
                                             X, starts_with="D"),
                                         percentile=90,
                                         log='y',
                                         ax=ax)
    assert_raise_message(TypeError,
                         "Expected ax as a matplotlib.axes.Axes. Got ",
                         scprep.plot.plot_library_size,
                         X,
                         ax="invalid")
示例#30
0
def test_differential_expression_by_cluster():
    measure = "difference"
    direction = "up"
    X = data.load_10X()
    np.random.seed(42)
    clusters = np.random.choice(4, X.shape[0], replace=True)
    result = scprep.stats.differential_expression_by_cluster(
        X, clusters, measure=measure, direction=direction)
    for cluster in range(4):
        r = scprep.stats.differential_expression(
            scprep.select.select_rows(X, idx=clusters == cluster),
            scprep.select.select_rows(X, idx=clusters != cluster),
            measure=measure,
            direction=direction,
        )
        assert np.all(result[cluster] == r)