示例#1
0
def test_10X_zip_not_a_file():
    utils.assert_raises_message(
        FileNotFoundError,
        "No such file: 'not_a_file.zip'",
        scprep.io.load_10X_zip,
        "not_a_file.zip",
    )
示例#2
0
def test_10X_zip_url_404():
    utils.assert_raises_message(
        urllib.error.HTTPError,
        "HTTP Error 404: Not Found",
        scprep.io.load_10X_zip,
        "https://github.com/KrishnaswamyLab/scprep/invalid_url",
    )
示例#3
0
def test_10X_zip_url_not_a_zip():
    utils.assert_raises_message(
        zipfile.BadZipFile,
        "File is not a zip file",
        scprep.io.load_10X_zip,
        "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X",
    )
示例#4
0
def test_10X_HDF5_invalid_backend():
    h5_file = os.path.join(data.data_dir, "test_10X.h5")
    utils.assert_raises_message(
        ValueError,
        "Expected backend in ['tables', 'h5py']. Got invalid",
        scprep.io.load_10X_HDF5,
        filename=h5_file,
        backend="invalid",
    )
示例#5
0
def test_10X_HDF5_invalid_gene_labels():
    h5_file = os.path.join(data.data_dir, "test_10X.h5")
    utils.assert_raises_message(
        ValueError,
        "gene_labels='invalid' not recognized. "
        "Choose from ['symbol', 'id', 'both']",
        scprep.io.load_10X_HDF5,
        filename=h5_file,
        gene_labels="invalid",
    )
示例#6
0
def test_10X_HDF5_invalid_genome():
    h5_file = os.path.join(data.data_dir, "test_10X.h5")
    utils.assert_raises_message(
        ValueError,
        "Genome invalid not found in {}. "
        "Available genomes: GRCh38".format(h5_file),
        scprep.io.load_10X_HDF5,
        filename=h5_file,
        genome="invalid",
    )
示例#7
0
def test_fcs_naming_error():
    path = fcsparser.test_sample_path
    utils.assert_raises_message(
        ValueError,
        "Expected channel_naming in ['$PnS', '$PnN']. "
        "Got 'invalid'",
        scprep.io.load_fcs,
        path,
        override=True,
        channel_naming="invalid",
    )
示例#8
0
def test_fcs_file_error():
    utils.assert_raises_message(
        RuntimeError,
        "fcsparser failed to load {}, likely due to"
        " a malformed header. You can try using "
        "`override=True` to use scprep's built-in "
        "experimental FCS parser.".format(
            os.path.join(data.data_dir, "test_small.csv")),
        scprep.io.load_fcs,
        os.path.join(data.data_dir, "test_small.csv"),
    )
示例#9
0
def test_10X_HDF5_genome_cellranger3():
    h5_file = os.path.join(data.data_dir, "test_10X_cellranger3.h5")
    utils.assert_raises_message(
        NotImplementedError,
        "Selecting genomes for Cellranger 3.0 files is not "
        "currently supported. Please file an issue at "
        "https://github.com/KrishnaswamyLab/scprep/issues",
        scprep.io.load_10X_HDF5,
        filename=h5_file,
        genome="GRCh38",
    )
示例#10
0
def test_parse_header():
    header1 = np.arange(10)
    header2 = os.path.join(data.data_dir, "gene_symbols.csv")
    utils.assert_raises_message(
        ValueError,
        "Expected 5 entries in gene_names. Got 10",
        scprep.io.utils._parse_header,
        header1,
        5,
    )
    utils.assert_raises_message(
        ValueError,
        "Expected 50 entries in {}. Got 100".format(os.path.abspath(header2)),
        scprep.io.utils._parse_header,
        header2,
        50,
    )
示例#11
0
def test_10X_zip_error():
    filename = os.path.join(data.data_dir, "test_10X.zip")
    utils.assert_raises_message(
        ValueError,
        "gene_labels='invalid' not recognized. "
        "Choose from ['symbol', 'id', 'both']",
        scprep.io.load_10X_zip,
        filename,
        gene_labels="invalid",
    )
    utils.assert_raises_message(
        ValueError,
        "Expected a single zipped folder containing 'matrix.mtx(.gz)', "
        "'[genes/features].tsv(.gz)', and 'barcodes.tsv(.gz)'. Got ",
        scprep.io.load_10X_zip,
        os.path.join(data.data_dir, "test_10X_invalid.zip"),
    )
示例#12
0
def test_mtx():
    X = data.load_10X()
    filename = os.path.join(data.data_dir, "test_10X", "matrix.mtx.gz")
    X_mtx = scprep.io.load_mtx(
        filename,
        gene_names=os.path.join(data.data_dir, "gene_symbols.csv"),
        cell_names=os.path.join(data.data_dir, "barcodes.tsv"),
        cell_axis="column",
    )
    assert np.sum(np.sum(X.to_numpy() != X_mtx.to_numpy())) == 0
    np.testing.assert_array_equal(X.columns, X_mtx.columns)
    np.testing.assert_array_equal(X.index, X_mtx.index)
    assert scprep.utils.is_sparse_dataframe(X_mtx)
    X_mtx = scprep.io.load_mtx(filename,
                               gene_names=X.columns,
                               cell_names=X.index,
                               cell_axis="column")
    assert np.sum(np.sum(X.to_numpy() != X_mtx.to_numpy())) == 0
    np.testing.assert_array_equal(X.columns, X_mtx.columns)
    np.testing.assert_array_equal(X.index, X_mtx.index)
    assert scprep.utils.is_sparse_dataframe(X_mtx)
    X_mtx = scprep.io.load_mtx(filename,
                               gene_names=None,
                               cell_names=None,
                               sparse=False,
                               cell_axis="column")
    assert np.sum(np.sum(X.to_numpy() != X_mtx)) == 0
    assert isinstance(X_mtx, np.ndarray)
    utils.assert_raises_message(
        ValueError,
        "cell_axis neither not recognized. "
        "Expected 'row' or 'column'",
        scprep.io.load_mtx,
        filename,
        cell_axis="neither",
    )
    X_mtx = scprep.io.load_mtx(
        filename,
        gene_names=np.arange(X.shape[1]).astype("str"),
        cell_names=np.arange(X.shape[0]),
    )
    assert X_mtx.shape == (100, 100)
    assert scprep.utils.is_sparse_dataframe(X_mtx)
    assert X_mtx.columns[0] == "0"
    assert X_mtx.index[0] == 0
示例#13
0
def test_fcs_header_error():
    path = fcsparser.test_sample_path
    meta, data = fcsparser.parse(path,
                                 reformat_meta=True,
                                 channel_naming="$PnN")
    meta_bad = copy.deepcopy(meta)
    meta_bad["$DATASTART"] = meta_bad["__header__"]["data start"]
    meta_bad["$DATAEND"] = meta_bad["__header__"]["data end"]
    meta_bad["__header__"]["data start"] = 0
    meta_bad["__header__"]["data end"] = 0
    assert (scprep.io.fcs._parse_fcs_header(meta_bad)["$DATASTART"] ==
            scprep.io.fcs._parse_fcs_header(meta)["$DATASTART"])
    assert (scprep.io.fcs._parse_fcs_header(meta_bad)["$DATAEND"] ==
            scprep.io.fcs._parse_fcs_header(meta)["$DATAEND"])

    meta_bad = copy.deepcopy(meta)
    meta_bad["$DATATYPE"] = "invalid"
    utils.assert_raises_message(
        ValueError,
        "Expected $DATATYPE in ['F', 'D']. "
        "Got 'invalid'",
        scprep.io.fcs._parse_fcs_header,
        meta_bad,
    )

    meta_bad = copy.deepcopy(meta)
    for byteord, endian in zip(["4,3,2,1", "1,2,3,4"], [">", "<"]):
        meta_bad["$BYTEORD"] = byteord
        assert scprep.io.fcs._parse_fcs_header(meta_bad)["$ENDIAN"] == endian
    meta_bad["$BYTEORD"] = "invalid"
    utils.assert_raises_message(
        ValueError,
        "Expected $BYTEORD in ['1,2,3,4', '4,3,2,1']. "
        "Got 'invalid'",
        scprep.io.fcs._parse_fcs_header,
        meta_bad,
    )
示例#14
0
def test_10X():
    X = data.load_10X()
    assert X.shape == (100, 100)
    assert scprep.utils.is_sparse_dataframe(X)
    assert X.columns[0] == "Arl8b"
    X = data.load_10X(gene_labels="id", sparse=False)
    assert X.shape == (100, 100)
    assert isinstance(X, pd.DataFrame)
    assert not scprep.utils.is_sparse_dataframe(X)
    assert X.columns[0] == "ENSMUSG00000030105"
    X = data.load_10X(gene_labels="both")
    assert X.shape == (100, 100)
    assert scprep.utils.is_sparse_dataframe(X)
    assert X.columns[0] == "Arl8b (ENSMUSG00000030105)"
    X_cellranger3 = scprep.io.load_10X(os.path.join(data.data_dir,
                                                    "test_10X_cellranger3"),
                                       gene_labels="both")
    np.testing.assert_array_equal(X.index, X_cellranger3.index)
    np.testing.assert_array_equal(X.columns, X_cellranger3.columns)
    np.testing.assert_array_equal(X.index, X_cellranger3.index)
    utils.assert_raises_message(
        ValueError,
        "gene_labels='invalid' not recognized. "
        "Choose from ['symbol', 'id', 'both']",
        data.load_10X,
        gene_labels="invalid",
    )
    utils.assert_raises_message(
        FileNotFoundError,
        "{} is not a directory".format(
            os.path.join(data.data_dir, "test_10X.zip")),
        scprep.io.load_10X,
        os.path.join(data.data_dir, "test_10X.zip"),
    )
    utils.assert_raises_message(
        FileNotFoundError,
        "'matrix.mtx(.gz)', '[genes/features].tsv(.gz)', and 'barcodes.tsv(.gz)' must be present "
        "in {}".format(data.data_dir),
        scprep.io.load_10X,
        data.data_dir,
    )
示例#15
0
def test_csv_and_tsv():
    X = data.load_10X()
    filename = os.path.join(data.data_dir, "test_small.csv")
    X_csv = scprep.io.load_csv(os.path.join(data.data_dir, "test_small.csv"),
                               gene_names=True,
                               cell_names=True)
    with utils.assert_warns_message(
            RuntimeWarning,
            "Duplicate cell names detected! Some functions may not work as intended. "
            "You can fix this by running `scprep.sanitize.check_index(data)`.",
    ):
        scprep.io.load_csv(
            os.path.join(data.data_dir, "test_small.csv"),
            gene_names=True,
            cell_names=[0] + list(range(X_csv.shape[1] - 1)),
        )
    X_csv2 = scprep.io.load_csv(
        os.path.join(data.data_dir, "test_small.csv"),
        gene_names=True,
        cell_names=None,
        index_col=0,
    )
    X_csv3 = scprep.io.load_csv(
        os.path.join(data.data_dir, "test_small.csv"),
        gene_names=None,
        cell_names=True,
        header=0,
    )
    X_csv4 = scprep.io.load_csv(
        os.path.join(data.data_dir, "test_small.csv"),
        gene_names=True,
        cell_names=True,
        cell_axis="col",
    )
    X_tsv = scprep.io.load_tsv(os.path.join(data.data_dir, "test_small.tsv"))
    assert np.sum(np.sum(X != X_csv)) == 0
    assert np.sum(np.sum(X_csv != X_csv2)) == 0
    assert np.sum(np.sum(X_csv != X_csv3)) == 0
    assert np.sum(np.sum(X_csv != X_csv4.T)) == 0
    assert np.sum(np.sum(X_csv != X_tsv)) == 0
    np.testing.assert_array_equal(X.columns, X_csv.columns)
    np.testing.assert_array_equal(X.index, X_csv.index)
    np.testing.assert_array_equal(X_csv.columns, X_csv2.columns)
    np.testing.assert_array_equal(X_csv.index, X_csv2.index)
    np.testing.assert_array_equal(X_csv.columns, X_csv3.columns)
    np.testing.assert_array_equal(X_csv.index, X_csv3.index)
    np.testing.assert_array_equal(X_csv.columns, X_csv4.index)
    np.testing.assert_array_equal(X_csv.index, X_csv4.columns)
    assert isinstance(X_csv, pd.DataFrame)
    assert not scprep.utils.is_sparse_dataframe(X_csv)
    X_csv = scprep.io.load_csv(
        os.path.join(data.data_dir, "test_small.csv"),
        gene_names=os.path.join(data.data_dir, "gene_symbols.csv"),
        cell_names=os.path.join(data.data_dir, "barcodes.tsv"),
        skiprows=1,
        usecols=range(1, 101),
    )
    assert np.sum(np.sum(X != X_csv)) == 0
    np.testing.assert_array_equal(X.columns, X_csv.columns)
    np.testing.assert_array_equal(X.index, X_csv.index)
    assert isinstance(X_csv, pd.DataFrame)
    assert not scprep.utils.is_sparse_dataframe(X_csv)
    X_csv = scprep.io.load_csv(
        os.path.join(data.data_dir, "test_small.csv"),
        gene_names=X.columns,
        cell_names=X.index,
        skiprows=1,
        usecols=range(1, 101),
    )
    assert np.sum(np.sum(X != X_csv)) == 0
    np.testing.assert_array_equal(X.columns, X_csv.columns)
    np.testing.assert_array_equal(X.index, X_csv.index)
    assert isinstance(X_csv, pd.DataFrame)
    assert not scprep.utils.is_sparse_dataframe(X_csv)
    X_csv = scprep.io.load_csv(
        os.path.join(data.data_dir, "test_small.csv"),
        gene_names=None,
        cell_names=None,
        sparse=True,
        skiprows=1,
        usecols=range(1, 101),
    )
    assert np.sum(np.sum(X.to_numpy() != X_csv.to_numpy())) == 0
    assert scprep.utils.is_sparse_dataframe(X_csv)
    X_csv = scprep.io.load_csv(
        os.path.join(data.data_dir, "test_small_duplicate_gene_names.csv"))
    assert "DUPLICATE" in X_csv.columns
    assert "DUPLICATE.1" in X_csv.columns
    utils.assert_raises_message(
        ValueError,
        "cell_axis neither not recognized. "
        "Expected 'row' or 'column'",
        scprep.io.load_csv,
        filename,
        cell_axis="neither",
    )