def test_knnDREMI(): X = data.generate_positive_sparse_matrix(shape=(500, 2), seed=42, poisson_mean=5) Y = scprep.stats.knnDREMI(X[:, 0], X[:, 1]) assert isinstance(Y, float) np.testing.assert_allclose(Y, 0.16238906) Y2, drevi = scprep.stats.knnDREMI(X[:, 0], X[:, 1], plot=True, filename="test.png", return_drevi=True) assert os.path.isfile("test.png") os.remove("test.png") assert Y2 == Y assert drevi.shape == (20, 20) matrix.test_all_matrix_types( X, utils.assert_transform_equals, Y=Y, transform=partial(_test_fun_2d, fun=scprep.stats.knnDREMI), check=utils.assert_all_close, ) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) assert scprep.stats.knnDREMI(X[:, 0], np.repeat(X[0, 1], X.shape[0]), return_drevi=True) == (0, None) utils.assert_raises_message( ValueError, "Expected k as an integer. Got ", scprep.stats.knnDREMI, X[:, 0], X[:, 1], k="invalid", ) utils.assert_raises_message( ValueError, "Expected n_bins as an integer. Got ", scprep.stats.knnDREMI, X[:, 0], X[:, 1], n_bins="invalid", ) utils.assert_raises_message( ValueError, "Expected n_mesh as an integer. Got ", scprep.stats.knnDREMI, X[:, 0], X[:, 1], n_mesh="invalid", ) utils.assert_warns_message( UserWarning, "Attempting to calculate kNN-DREMI on a constant array. " "Returning `0`", scprep.stats.knnDREMI, X[:, 0], np.zeros_like(X[:, 1]), )
def test_select_cols_no_condition(self): utils.assert_warns_message( UserWarning, "No selection conditions provided. Returning all columns.", scprep.select.select_cols, self.X, )
def test_get_cell_set_no_condition(self): utils.assert_warns_message( UserWarning, "No selection conditions provided. Returning all cells.", scprep.select.get_cell_set, self.X, )
def test_gene_expression_filter_warning(self): genes = np.arange(10) no_genes = "not_a_gene" utils.assert_warns_message( UserWarning, "`percentile` expects values between 0 and 100." "Got 0.9. Did you mean 90.0?", scprep.filter.filter_gene_set_expression, self.X_sparse, genes=genes, percentile=0.90, keep_cells="below", ) utils.assert_raises_message( ValueError, "Only one of `cutoff` and `percentile` should be given.", scprep.filter.filter_gene_set_expression, self.X_sparse, genes=genes, percentile=0.90, cutoff=50, ) utils.assert_raises_message( ValueError, "Expected `keep_cells` in ['above', 'below', 'between']. " "Got neither", scprep.filter.filter_gene_set_expression, self.X_sparse, genes=genes, percentile=90.0, keep_cells="neither", ) utils.assert_warns_message( UserWarning, "`percentile` expects values between 0 and 100." "Got 0.9. Did you mean 90.0?", scprep.filter.filter_gene_set_expression, self.X_sparse, genes=genes, percentile=0.90, keep_cells="below", ) utils.assert_raises_message( ValueError, "One of either `cutoff` or `percentile` must be given.", scprep.filter.filter_gene_set_expression, self.X_sparse, genes=genes, cutoff=None, percentile=None, ) utils.assert_raises_message( KeyError, "not_a_gene", scprep.filter.filter_gene_set_expression, self.X_sparse, genes=no_genes, percentile=90.0, keep_cells="below", )
def test_select_rows_zero_rows(self): utils.assert_warns_message( UserWarning, "Selecting 0 rows", scprep.select.select_rows, self.X, idx=(self.X.sum(axis=1) < 0), )
def test_select_cols_zero_columns(self): utils.assert_warns_message( UserWarning, "Selecting 0 columns", scprep.select.select_cols, self.X, idx=(self.X.sum(axis=0) < 0), )
def test_fun(X): utils.assert_warns_message( RuntimeWarning, "log transform on sparse data requires pseudocount = 1", scprep.transform.log, data=X, base=2, pseudocount=5, )
def test_libsize_norm_median_zero(self): X = self.X.copy() X[:X.shape[0] // 2 + 1] = 0 utils.assert_warns_message( UserWarning, "Median library size is zero. " "Rescaling to mean instead.", scprep.normalize.library_size_normalize, X, rescale="median", )
def test_differential_expression_error(): X = data.load_10X() utils.assert_raises_message( ValueError, "Expected `direction` in ['up', 'down', 'both']. " "Got invalid", scprep.stats.differential_expression, X, X, direction="invalid", ) utils.assert_raises_message( ValueError, "Expected `measure` in ['difference', 'emd', 'ttest', 'ranksum']. " "Got invalid", scprep.stats.differential_expression, X, X, measure="invalid", ) utils.assert_raises_message( ValueError, "Expected `X` and `Y` to be matrices. " "Got shapes {}, {}".format(X.shape, X.iloc[0].shape), scprep.stats.differential_expression, X, X.iloc[0], ) utils.assert_raises_message( ValueError, "Expected gene_names to have length {}. " "Got {}".format(X.shape[0], X.shape[0] // 2), scprep.stats.differential_expression, X.sparse.to_coo(), X.sparse.to_coo(), gene_names=np.arange(X.shape[0] // 2), ) utils.assert_raises_message( ValueError, "Expected gene_names to have length {}. " "Got {}".format(X.shape[0], X.shape[0] // 2), scprep.stats.differential_expression_by_cluster, X.sparse.to_coo(), np.random.choice(2, X.shape[0], replace=True), gene_names=np.arange(X.shape[0] // 2), ) utils.assert_warns_message( UserWarning, "Input data has inconsistent column names. " "Subsetting to 20 common columns.", scprep.stats.differential_expression, X, X.iloc[:, :20], )
def test_splatter_deprecated(self): utils.assert_warns_message( FutureWarning, "path_length has been renamed path_n_steps, " "please use path_n_steps in the future.", scprep.run.SplatSimulate, batch_cells=10, n_genes=200, verbose=0, path_length=100, )
def test_deprecated_remove(self): utils.assert_warns_message( DeprecationWarning, "`scprep.filter.remove_empty_genes` is deprecated. Use " "`scprep.filter.filter_empty_genes` instead.", scprep.filter.remove_empty_genes, self.X_dense, ) utils.assert_warns_message( DeprecationWarning, "`scprep.filter.remove_rare_genes` is deprecated. Use " "`scprep.filter.filter_rare_genes` instead.", scprep.filter.remove_rare_genes, self.X_dense, ) utils.assert_warns_message( DeprecationWarning, "`scprep.filter.remove_empty_cells` is deprecated. Use " "`scprep.filter.filter_empty_cells` instead.", scprep.filter.remove_empty_cells, self.X_dense, ) utils.assert_warns_message( DeprecationWarning, "`scprep.filter.remove_duplicates` is deprecated. Use " "`scprep.filter.filter_duplicates` instead.", scprep.filter.remove_duplicates, self.X_dense, )
def test_deprecated_sample_labels(self): sample_labels = np.arange(self.X_dense.shape[0]) utils.assert_warns_message( DeprecationWarning, "`sample_labels` is deprecated. " "Passing `sample_labels` as `extra_data`.", scprep.filter.filter_empty_cells, self.X_dense, sample_labels=sample_labels, ) utils.assert_warns_message( DeprecationWarning, "`sample_labels` is deprecated. " "Passing `sample_labels` as `extra_data`.", scprep.filter.filter_duplicates, self.X_dense, sample_labels=sample_labels, ) utils.assert_warns_message( DeprecationWarning, "`sample_labels` is deprecated. " "Passing `sample_labels` as `extra_data`.", scprep.filter.filter_library_size, self.X_dense, cutoff=10, sample_labels=sample_labels, ) utils.assert_warns_message( DeprecationWarning, "`filter_per_sample` is deprecated. " "Filtering as a single sample.", scprep.filter.filter_library_size, self.X_dense, cutoff=10, filter_per_sample=True, )
def test_combine_batches_errors(): X = data.load_10X() utils.assert_warns_message( UserWarning, "append_to_cell_names only valid for pd.DataFrame input. " "Got coo_matrix", scprep.utils.combine_batches, [X.sparse.to_coo(), X.iloc[: X.shape[0] // 2].sparse.to_coo()], batch_labels=[0, 1], append_to_cell_names=True, ) utils.assert_raises_message( TypeError, "Expected data all of the same class. Got DataFrame, coo_matrix", scprep.utils.combine_batches, [X, X.iloc[: X.shape[0] // 2].sparse.to_coo()], batch_labels=[0, 1], ) utils.assert_raises_message( ValueError, "Expected data all with the same number of columns. " "Got {}, {}".format(X.shape[1], X.shape[1] // 2), scprep.utils.combine_batches, [ scprep.utils.toarray(X), scprep.select.select_cols( scprep.utils.toarray(X), idx=np.arange(X.shape[1] // 2) ), ], batch_labels=[0, 1], ) utils.assert_raises_message( ValueError, "Expected data (2) and batch_labels (1) to be the same length.", scprep.utils.combine_batches, [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], batch_labels=[0], ) utils.assert_raises_message( ValueError, "Expected data to contain pandas DataFrames, " "scipy sparse matrices or numpy arrays. Got str", scprep.utils.combine_batches, ["hello", "world"], batch_labels=[0, 1], )
def test_slingshot_errors(self): utils.assert_warns_message( UserWarning, "Expected data to be low-dimensional. " "Got data.shape[1] = 4", scprep.run.Slingshot, self.X_pca[:, :4], self.clusters, verbose=False, ) utils.assert_raises_message( ValueError, "Expected len(cluster_labels) ({}) to equal " "data.shape[0] ({})".format(self.X.shape[0] // 2, self.X.shape[0]), scprep.run.Slingshot, self.X_pca[:, :2], self.clusters[: self.X.shape[0] // 2], verbose=False, )
def test_check_index_multiindex(): X = data.load_10X() X["i"] = [i for i in range(X.shape[0])] X["i+1"] = [i + 1 for i in range(X.shape[0])] X = X.set_index(["i", "i+1"]) scprep.sanitize.check_index(X) with utils.assert_warns_message( RuntimeWarning, "Renamed 2 copies of index (0, 1) to ((0, 1), (0, '1.1'))"): scprep.sanitize.check_index(X.iloc[[0, 0]]) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) Y = scprep.sanitize.check_index(X.iloc[[0, 0]]) assert isinstance(Y.loc[(0, 1)], pd.Series) assert isinstance(Y.loc[(0, "1.1")], pd.Series) Y = X.iloc[[0, 0]] scprep.sanitize.check_index(Y) assert isinstance(Y.loc[(0, 1)], pd.Series) assert isinstance(Y.loc[(0, "1.1")], pd.Series) Y = X.iloc[[0, 0]] scprep.sanitize.check_index(Y, copy=True) assert isinstance(Y.loc[(0, 1)], pd.DataFrame) assert Y.loc[(0, 1)].shape[0] == 2 with utils.assert_warns_message( RuntimeWarning, "Renamed 3 copies of index (1, 2) to ((1, 2), (1, '2.1'), (1, '2.2'))", ): scprep.sanitize.check_index(X.iloc[[1, 1, 1]]) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) Y = scprep.sanitize.check_index(X.iloc[[1, 1, 1]]) assert isinstance(Y.loc[(1, 2)], pd.Series) assert isinstance(Y.loc[(1, "2.1")], pd.Series) assert isinstance(Y.loc[(1, "2.2")], pd.Series) Y = X.iloc[[1, 1, 1]] scprep.sanitize.check_index(Y) assert isinstance(Y.loc[(1, 2)], pd.Series) assert isinstance(Y.loc[(1, "2.1")], pd.Series) assert isinstance(Y.loc[(1, "2.2")], pd.Series) Y = X.iloc[[1, 1, 1]] scprep.sanitize.check_index(Y, copy=True) assert isinstance(Y.loc[(1, 2)], pd.DataFrame) assert Y.loc[(1, 2)].shape[0] == 3
def test_10X_duplicate_gene_names(): utils.assert_warns_message( RuntimeWarning, "Duplicate gene names detected! Forcing `gene_labels='both'`. " "Alternatively, try `gene_labels='id'`, `allow_duplicates=True`, or " "load the matrix with `sparse=False`", scprep.io.load_10X, os.path.join(data.data_dir, "test_10X_duplicate_gene_names"), gene_labels="symbol", sparse=True, ) utils.assert_warns_message( RuntimeWarning, "Duplicate gene names detected! Forcing dense matrix", scprep.io.load_10X, os.path.join(data.data_dir, "test_10X_duplicate_gene_names"), allow_duplicates=True, sparse=True, )
def test_check_index(): X = data.load_10X() scprep.sanitize.check_index(X) with utils.assert_warns_message( RuntimeWarning, "Renamed 2 copies of index GATGAGGCATTTCAGG-1 to " "(GATGAGGCATTTCAGG-1, GATGAGGCATTTCAGG-1.1)", ): scprep.sanitize.check_index(X.iloc[[0, 0]]) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) Y = scprep.sanitize.check_index(X.iloc[[0, 0]]) assert isinstance(Y.loc["GATGAGGCATTTCAGG-1"], pd.Series) assert isinstance(Y.loc["GATGAGGCATTTCAGG-1.1"], pd.Series) Y = X.iloc[[0, 0]] scprep.sanitize.check_index(Y) assert isinstance(Y.loc["GATGAGGCATTTCAGG-1"], pd.Series) assert isinstance(Y.loc["GATGAGGCATTTCAGG-1.1"], pd.Series) Y = X.iloc[[0, 0]] scprep.sanitize.check_index(Y, copy=True) assert isinstance(Y.loc["GATGAGGCATTTCAGG-1"], pd.DataFrame) assert Y.loc["GATGAGGCATTTCAGG-1"].shape[0] == 2 with utils.assert_warns_message( RuntimeWarning, "Renamed 3 copies of index GTCATTTCATCTCGCT-1 to " "(GTCATTTCATCTCGCT-1, GTCATTTCATCTCGCT-1.1, GTCATTTCATCTCGCT-1.2)", ): scprep.sanitize.check_index(X.iloc[[1, 1, 1]]) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) Y = scprep.sanitize.check_index(X.iloc[[1, 1, 1]]) assert isinstance(Y.loc["GTCATTTCATCTCGCT-1"], pd.Series) assert isinstance(Y.loc["GTCATTTCATCTCGCT-1.1"], pd.Series) assert isinstance(Y.loc["GTCATTTCATCTCGCT-1.2"], pd.Series) Y = X.iloc[[1, 1, 1]] scprep.sanitize.check_index(Y) assert isinstance(Y.loc["GTCATTTCATCTCGCT-1"], pd.Series) assert isinstance(Y.loc["GTCATTTCATCTCGCT-1.1"], pd.Series) assert isinstance(Y.loc["GTCATTTCATCTCGCT-1.2"], pd.Series) Y = X.iloc[[1, 1, 1]] scprep.sanitize.check_index(Y, copy=True) assert isinstance(Y.loc["GTCATTTCATCTCGCT-1"], pd.DataFrame) assert Y.loc["GTCATTTCATCTCGCT-1"].shape[0] == 3
def test_combine_batches_uncommon_genes(sparse): X = data.load_10X(sparse=sparse) Y = X.iloc[:, : X.shape[1] // 2] utils.assert_warns_message( UserWarning, "Input data has inconsistent column names. " "Subsetting to {} common columns.".format(Y.shape[1]), scprep.utils.combine_batches, [X, Y], ["x", "y"], ) utils.assert_warns_message( UserWarning, "Input data has inconsistent column names. " "Padding with zeros to {} total columns.".format(X.shape[1]), scprep.utils.combine_batches, [X, Y], ["x", "y"], common_columns_only=False, )
def test_deprecated(): X = data.generate_positive_sparse_matrix() Y = scprep.transform.sqrt(X) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) utils.assert_transform_equivalent( X, Y=Y, transform=scprep.transform.sqrt_transform ) utils.assert_warns_message( FutureWarning, "scprep.transform.sqrt_transform is deprecated. Please use " "scprep.transform.sqrt in future.", scprep.transform.sqrt_transform, data=X, ) Y = scprep.transform.log(X) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) utils.assert_transform_equivalent( X, Y=Y, transform=scprep.transform.log_transform ) utils.assert_warns_message( FutureWarning, "scprep.transform.log_transform is deprecated. Please use " "scprep.transform.log in future.", scprep.transform.log_transform, data=X, ) Y = scprep.transform.arcsinh(X) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) utils.assert_transform_equivalent( X, Y=Y, transform=scprep.transform.arcsinh_transform ) utils.assert_warns_message( FutureWarning, "scprep.transform.arcsinh_transform is deprecated. Please use " "scprep.transform.arcsinh in future.", scprep.transform.arcsinh_transform, data=X, )
def test_deprecated(self): utils.assert_warns_message( FutureWarning, "n_pca is deprecated. Setting n_components=2", scprep.reduce.pca, self.X, n_pca=2, ) utils.assert_warns_message( FutureWarning, "svd_offset is deprecated. Please use `eps` instead.", scprep.reduce.pca, self.X, n_components=2, svd_offset=100, ) utils.assert_warns_message( FutureWarning, "svd_multiples is deprecated. Please use `eps` instead.", scprep.reduce.pca, self.X, n_components=2, svd_multiples=100, )
def test_csv_and_tsv(): X = data.load_10X() filename = os.path.join(data.data_dir, "test_small.csv") X_csv = scprep.io.load_csv(os.path.join(data.data_dir, "test_small.csv"), gene_names=True, cell_names=True) with utils.assert_warns_message( RuntimeWarning, "Duplicate cell names detected! Some functions may not work as intended. " "You can fix this by running `scprep.sanitize.check_index(data)`.", ): scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=True, cell_names=[0] + list(range(X_csv.shape[1] - 1)), ) X_csv2 = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=True, cell_names=None, index_col=0, ) X_csv3 = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=None, cell_names=True, header=0, ) X_csv4 = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=True, cell_names=True, cell_axis="col", ) X_tsv = scprep.io.load_tsv(os.path.join(data.data_dir, "test_small.tsv")) assert np.sum(np.sum(X != X_csv)) == 0 assert np.sum(np.sum(X_csv != X_csv2)) == 0 assert np.sum(np.sum(X_csv != X_csv3)) == 0 assert np.sum(np.sum(X_csv != X_csv4.T)) == 0 assert np.sum(np.sum(X_csv != X_tsv)) == 0 np.testing.assert_array_equal(X.columns, X_csv.columns) np.testing.assert_array_equal(X.index, X_csv.index) np.testing.assert_array_equal(X_csv.columns, X_csv2.columns) np.testing.assert_array_equal(X_csv.index, X_csv2.index) np.testing.assert_array_equal(X_csv.columns, X_csv3.columns) np.testing.assert_array_equal(X_csv.index, X_csv3.index) np.testing.assert_array_equal(X_csv.columns, X_csv4.index) np.testing.assert_array_equal(X_csv.index, X_csv4.columns) assert isinstance(X_csv, pd.DataFrame) assert not scprep.utils.is_sparse_dataframe(X_csv) X_csv = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=os.path.join(data.data_dir, "gene_symbols.csv"), cell_names=os.path.join(data.data_dir, "barcodes.tsv"), skiprows=1, usecols=range(1, 101), ) assert np.sum(np.sum(X != X_csv)) == 0 np.testing.assert_array_equal(X.columns, X_csv.columns) np.testing.assert_array_equal(X.index, X_csv.index) assert isinstance(X_csv, pd.DataFrame) assert not scprep.utils.is_sparse_dataframe(X_csv) X_csv = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=X.columns, cell_names=X.index, skiprows=1, usecols=range(1, 101), ) assert np.sum(np.sum(X != X_csv)) == 0 np.testing.assert_array_equal(X.columns, X_csv.columns) np.testing.assert_array_equal(X.index, X_csv.index) assert isinstance(X_csv, pd.DataFrame) assert not scprep.utils.is_sparse_dataframe(X_csv) X_csv = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=None, cell_names=None, sparse=True, skiprows=1, usecols=range(1, 101), ) assert np.sum(np.sum(X.to_numpy() != X_csv.to_numpy())) == 0 assert scprep.utils.is_sparse_dataframe(X_csv) X_csv = scprep.io.load_csv( os.path.join(data.data_dir, "test_small_duplicate_gene_names.csv")) assert "DUPLICATE" in X_csv.columns assert "DUPLICATE.1" in X_csv.columns utils.assert_raises_message( ValueError, "cell_axis neither not recognized. " "Expected 'row' or 'column'", scprep.io.load_csv, filename, cell_axis="neither", )
def test_check_index_ndarray(): with utils.assert_warns_message( UserWarning, "scprep.sanitize.check_index only accepts pandas input"): scprep.sanitize.check_index(np.array([0, 1]))