def test_10X_zip_not_a_file(): utils.assert_raises_message( FileNotFoundError, "No such file: 'not_a_file.zip'", scprep.io.load_10X_zip, "not_a_file.zip", )
def test_10X_zip_url_404(): utils.assert_raises_message( urllib.error.HTTPError, "HTTP Error 404: Not Found", scprep.io.load_10X_zip, "https://github.com/KrishnaswamyLab/scprep/invalid_url", )
def test_10X_zip_url_not_a_zip(): utils.assert_raises_message( zipfile.BadZipFile, "File is not a zip file", scprep.io.load_10X_zip, "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X", )
def test_10X_HDF5_invalid_backend(): h5_file = os.path.join(data.data_dir, "test_10X.h5") utils.assert_raises_message( ValueError, "Expected backend in ['tables', 'h5py']. Got invalid", scprep.io.load_10X_HDF5, filename=h5_file, backend="invalid", )
def test_10X_HDF5_invalid_gene_labels(): h5_file = os.path.join(data.data_dir, "test_10X.h5") utils.assert_raises_message( ValueError, "gene_labels='invalid' not recognized. " "Choose from ['symbol', 'id', 'both']", scprep.io.load_10X_HDF5, filename=h5_file, gene_labels="invalid", )
def test_10X_HDF5_invalid_genome(): h5_file = os.path.join(data.data_dir, "test_10X.h5") utils.assert_raises_message( ValueError, "Genome invalid not found in {}. " "Available genomes: GRCh38".format(h5_file), scprep.io.load_10X_HDF5, filename=h5_file, genome="invalid", )
def test_fcs_naming_error(): path = fcsparser.test_sample_path utils.assert_raises_message( ValueError, "Expected channel_naming in ['$PnS', '$PnN']. " "Got 'invalid'", scprep.io.load_fcs, path, override=True, channel_naming="invalid", )
def test_fcs_file_error(): utils.assert_raises_message( RuntimeError, "fcsparser failed to load {}, likely due to" " a malformed header. You can try using " "`override=True` to use scprep's built-in " "experimental FCS parser.".format( os.path.join(data.data_dir, "test_small.csv")), scprep.io.load_fcs, os.path.join(data.data_dir, "test_small.csv"), )
def test_10X_HDF5_genome_cellranger3(): h5_file = os.path.join(data.data_dir, "test_10X_cellranger3.h5") utils.assert_raises_message( NotImplementedError, "Selecting genomes for Cellranger 3.0 files is not " "currently supported. Please file an issue at " "https://github.com/KrishnaswamyLab/scprep/issues", scprep.io.load_10X_HDF5, filename=h5_file, genome="GRCh38", )
def test_parse_header(): header1 = np.arange(10) header2 = os.path.join(data.data_dir, "gene_symbols.csv") utils.assert_raises_message( ValueError, "Expected 5 entries in gene_names. Got 10", scprep.io.utils._parse_header, header1, 5, ) utils.assert_raises_message( ValueError, "Expected 50 entries in {}. Got 100".format(os.path.abspath(header2)), scprep.io.utils._parse_header, header2, 50, )
def test_10X_zip_error(): filename = os.path.join(data.data_dir, "test_10X.zip") utils.assert_raises_message( ValueError, "gene_labels='invalid' not recognized. " "Choose from ['symbol', 'id', 'both']", scprep.io.load_10X_zip, filename, gene_labels="invalid", ) utils.assert_raises_message( ValueError, "Expected a single zipped folder containing 'matrix.mtx(.gz)', " "'[genes/features].tsv(.gz)', and 'barcodes.tsv(.gz)'. Got ", scprep.io.load_10X_zip, os.path.join(data.data_dir, "test_10X_invalid.zip"), )
def test_mtx(): X = data.load_10X() filename = os.path.join(data.data_dir, "test_10X", "matrix.mtx.gz") X_mtx = scprep.io.load_mtx( filename, gene_names=os.path.join(data.data_dir, "gene_symbols.csv"), cell_names=os.path.join(data.data_dir, "barcodes.tsv"), cell_axis="column", ) assert np.sum(np.sum(X.to_numpy() != X_mtx.to_numpy())) == 0 np.testing.assert_array_equal(X.columns, X_mtx.columns) np.testing.assert_array_equal(X.index, X_mtx.index) assert scprep.utils.is_sparse_dataframe(X_mtx) X_mtx = scprep.io.load_mtx(filename, gene_names=X.columns, cell_names=X.index, cell_axis="column") assert np.sum(np.sum(X.to_numpy() != X_mtx.to_numpy())) == 0 np.testing.assert_array_equal(X.columns, X_mtx.columns) np.testing.assert_array_equal(X.index, X_mtx.index) assert scprep.utils.is_sparse_dataframe(X_mtx) X_mtx = scprep.io.load_mtx(filename, gene_names=None, cell_names=None, sparse=False, cell_axis="column") assert np.sum(np.sum(X.to_numpy() != X_mtx)) == 0 assert isinstance(X_mtx, np.ndarray) utils.assert_raises_message( ValueError, "cell_axis neither not recognized. " "Expected 'row' or 'column'", scprep.io.load_mtx, filename, cell_axis="neither", ) X_mtx = scprep.io.load_mtx( filename, gene_names=np.arange(X.shape[1]).astype("str"), cell_names=np.arange(X.shape[0]), ) assert X_mtx.shape == (100, 100) assert scprep.utils.is_sparse_dataframe(X_mtx) assert X_mtx.columns[0] == "0" assert X_mtx.index[0] == 0
def test_fcs_header_error(): path = fcsparser.test_sample_path meta, data = fcsparser.parse(path, reformat_meta=True, channel_naming="$PnN") meta_bad = copy.deepcopy(meta) meta_bad["$DATASTART"] = meta_bad["__header__"]["data start"] meta_bad["$DATAEND"] = meta_bad["__header__"]["data end"] meta_bad["__header__"]["data start"] = 0 meta_bad["__header__"]["data end"] = 0 assert (scprep.io.fcs._parse_fcs_header(meta_bad)["$DATASTART"] == scprep.io.fcs._parse_fcs_header(meta)["$DATASTART"]) assert (scprep.io.fcs._parse_fcs_header(meta_bad)["$DATAEND"] == scprep.io.fcs._parse_fcs_header(meta)["$DATAEND"]) meta_bad = copy.deepcopy(meta) meta_bad["$DATATYPE"] = "invalid" utils.assert_raises_message( ValueError, "Expected $DATATYPE in ['F', 'D']. " "Got 'invalid'", scprep.io.fcs._parse_fcs_header, meta_bad, ) meta_bad = copy.deepcopy(meta) for byteord, endian in zip(["4,3,2,1", "1,2,3,4"], [">", "<"]): meta_bad["$BYTEORD"] = byteord assert scprep.io.fcs._parse_fcs_header(meta_bad)["$ENDIAN"] == endian meta_bad["$BYTEORD"] = "invalid" utils.assert_raises_message( ValueError, "Expected $BYTEORD in ['1,2,3,4', '4,3,2,1']. " "Got 'invalid'", scprep.io.fcs._parse_fcs_header, meta_bad, )
def test_10X(): X = data.load_10X() assert X.shape == (100, 100) assert scprep.utils.is_sparse_dataframe(X) assert X.columns[0] == "Arl8b" X = data.load_10X(gene_labels="id", sparse=False) assert X.shape == (100, 100) assert isinstance(X, pd.DataFrame) assert not scprep.utils.is_sparse_dataframe(X) assert X.columns[0] == "ENSMUSG00000030105" X = data.load_10X(gene_labels="both") assert X.shape == (100, 100) assert scprep.utils.is_sparse_dataframe(X) assert X.columns[0] == "Arl8b (ENSMUSG00000030105)" X_cellranger3 = scprep.io.load_10X(os.path.join(data.data_dir, "test_10X_cellranger3"), gene_labels="both") np.testing.assert_array_equal(X.index, X_cellranger3.index) np.testing.assert_array_equal(X.columns, X_cellranger3.columns) np.testing.assert_array_equal(X.index, X_cellranger3.index) utils.assert_raises_message( ValueError, "gene_labels='invalid' not recognized. " "Choose from ['symbol', 'id', 'both']", data.load_10X, gene_labels="invalid", ) utils.assert_raises_message( FileNotFoundError, "{} is not a directory".format( os.path.join(data.data_dir, "test_10X.zip")), scprep.io.load_10X, os.path.join(data.data_dir, "test_10X.zip"), ) utils.assert_raises_message( FileNotFoundError, "'matrix.mtx(.gz)', '[genes/features].tsv(.gz)', and 'barcodes.tsv(.gz)' must be present " "in {}".format(data.data_dir), scprep.io.load_10X, data.data_dir, )
def test_csv_and_tsv(): X = data.load_10X() filename = os.path.join(data.data_dir, "test_small.csv") X_csv = scprep.io.load_csv(os.path.join(data.data_dir, "test_small.csv"), gene_names=True, cell_names=True) with utils.assert_warns_message( RuntimeWarning, "Duplicate cell names detected! Some functions may not work as intended. " "You can fix this by running `scprep.sanitize.check_index(data)`.", ): scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=True, cell_names=[0] + list(range(X_csv.shape[1] - 1)), ) X_csv2 = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=True, cell_names=None, index_col=0, ) X_csv3 = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=None, cell_names=True, header=0, ) X_csv4 = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=True, cell_names=True, cell_axis="col", ) X_tsv = scprep.io.load_tsv(os.path.join(data.data_dir, "test_small.tsv")) assert np.sum(np.sum(X != X_csv)) == 0 assert np.sum(np.sum(X_csv != X_csv2)) == 0 assert np.sum(np.sum(X_csv != X_csv3)) == 0 assert np.sum(np.sum(X_csv != X_csv4.T)) == 0 assert np.sum(np.sum(X_csv != X_tsv)) == 0 np.testing.assert_array_equal(X.columns, X_csv.columns) np.testing.assert_array_equal(X.index, X_csv.index) np.testing.assert_array_equal(X_csv.columns, X_csv2.columns) np.testing.assert_array_equal(X_csv.index, X_csv2.index) np.testing.assert_array_equal(X_csv.columns, X_csv3.columns) np.testing.assert_array_equal(X_csv.index, X_csv3.index) np.testing.assert_array_equal(X_csv.columns, X_csv4.index) np.testing.assert_array_equal(X_csv.index, X_csv4.columns) assert isinstance(X_csv, pd.DataFrame) assert not scprep.utils.is_sparse_dataframe(X_csv) X_csv = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=os.path.join(data.data_dir, "gene_symbols.csv"), cell_names=os.path.join(data.data_dir, "barcodes.tsv"), skiprows=1, usecols=range(1, 101), ) assert np.sum(np.sum(X != X_csv)) == 0 np.testing.assert_array_equal(X.columns, X_csv.columns) np.testing.assert_array_equal(X.index, X_csv.index) assert isinstance(X_csv, pd.DataFrame) assert not scprep.utils.is_sparse_dataframe(X_csv) X_csv = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=X.columns, cell_names=X.index, skiprows=1, usecols=range(1, 101), ) assert np.sum(np.sum(X != X_csv)) == 0 np.testing.assert_array_equal(X.columns, X_csv.columns) np.testing.assert_array_equal(X.index, X_csv.index) assert isinstance(X_csv, pd.DataFrame) assert not scprep.utils.is_sparse_dataframe(X_csv) X_csv = scprep.io.load_csv( os.path.join(data.data_dir, "test_small.csv"), gene_names=None, cell_names=None, sparse=True, skiprows=1, usecols=range(1, 101), ) assert np.sum(np.sum(X.to_numpy() != X_csv.to_numpy())) == 0 assert scprep.utils.is_sparse_dataframe(X_csv) X_csv = scprep.io.load_csv( os.path.join(data.data_dir, "test_small_duplicate_gene_names.csv")) assert "DUPLICATE" in X_csv.columns assert "DUPLICATE.1" in X_csv.columns utils.assert_raises_message( ValueError, "cell_axis neither not recognized. " "Expected 'row' or 'column'", scprep.io.load_csv, filename, cell_axis="neither", )