def test_readwrite_zarr(typ, tmp_path): X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_src.raw = adata_src assert not is_categorical_dtype(adata_src.obs["oanno1"]) adata_src.write_zarr(tmp_path / "test_zarr_dir", chunks=True) adata = ad.read_zarr(tmp_path / "test_zarr_dir") assert is_categorical_dtype(adata.obs["oanno1"]) assert not is_categorical_dtype(adata.obs["oanno2"]) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"] assert adata.obs["oanno1c"].cat.categories.tolist() == ["cat1"] assert is_categorical_dtype(adata.raw.var["vanno2"]) pd.testing.assert_frame_equal(adata.obs, adata_src.obs) pd.testing.assert_frame_equal(adata.var, adata_src.var) assert np.all(adata.var.index == adata_src.var.index) assert adata.var.index.dtype == adata_src.var.index.dtype assert type(adata.raw.X) is type(adata_src.raw.X) assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X)) assert np.all(adata.raw.var == adata_src.raw.var) assert isinstance(adata.uns["uns4"]["a"], (int, np.integer)) assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer)) assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"]) assert_equal(adata, adata_src)
def test_inplace_subset_obs(matrix_type, subset_func): orig = gen_adata((30, 30), X_type=matrix_type) subset_idx = subset_func(orig.obs_names) modified = orig.copy() from_view = orig[subset_idx, :].copy() modified._inplace_subset_obs(subset_idx) assert_equal(asarray(from_view.X), asarray(modified.X), exact=True) assert_equal(from_view.obs, modified.obs, exact=True) assert_equal(from_view.var, modified.var, exact=True) for k in from_view.obsm: assert_equal(asarray(from_view.obsm[k]), asarray(modified.obsm[k]), exact=True) for k in from_view.varm: assert_equal(asarray(from_view.varm[k]), asarray(modified.varm[k]), exact=True) assert_equal(asarray(orig.varm[k]), asarray(modified.varm[k]), exact=True) for k in from_view.layers: assert_equal(asarray(from_view.layers[k]), asarray(modified.layers[k]), exact=True)
def test_readwrite_h5ad(typ, dataset_kwargs, backing_h5ad): tmpdir = tempfile.TemporaryDirectory() tmpdirpth = Path(tmpdir.name) mid_pth = tmpdirpth / "mid.h5ad" X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) assert not is_categorical_dtype(adata_src.obs["oanno1"]) adata_src.raw = adata_src adata_src.write(backing_h5ad, **dataset_kwargs) adata_mid = ad.read(backing_h5ad) adata_mid.write(mid_pth, **dataset_kwargs) adata = ad.read_h5ad(mid_pth) assert is_categorical_dtype(adata.obs["oanno1"]) assert not is_categorical_dtype(adata.obs["oanno2"]) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"] assert is_categorical_dtype(adata.raw.var["vanno2"]) assert np.all(adata.obs == adata_src.obs) assert np.all(adata.var == adata_src.var) assert np.all(adata.var.index == adata_src.var.index) assert adata.var.index.dtype == adata_src.var.index.dtype assert type(adata.raw.X) is type(adata_src.raw.X) assert type(adata.raw.varm) is type(adata_src.raw.varm) assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X)) assert np.all(adata.raw.var == adata_src.raw.var) assert isinstance(adata.uns["uns4"]["a"], (int, np.integer)) assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer)) assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"]) assert_equal(adata, adata_src)
def test_double_index(subset_func, subset_func2): adata = gen_adata((10, 10)) obs_subset = subset_func(adata.obs_names) var_subset = subset_func2(adata.var_names) v1 = adata[obs_subset, var_subset] v2 = adata[obs_subset, :][:, var_subset] assert np.all(asarray(v1.X) == asarray(v2.X)) assert np.all(v1.obs == v2.obs) assert np.all(v1.var == v2.var)
def test_view_of_view_modification(): adata = ad.AnnData(np.zeros((10, 10))) adata[0, :][:, 5:].X = np.ones(5) assert np.all(adata.X[0, 5:] == np.ones(5)) adata[[1, 2], :][:, [1, 2]].X = np.ones((2, 2)) assert np.all(adata.X[1:3, 1:3] == np.ones((2, 2))) adata.X = sparse.csr_matrix(adata.X) adata[0, :][:, 5:].X = np.ones(5) * 2 assert np.all(asarray(adata.X)[0, 5:] == np.ones(5) * 2) adata[[1, 2], :][:, [1, 2]].X = np.ones((2, 2)) * 2 assert np.all(asarray(adata.X)[1:3, 1:3] == np.ones((2, 2)) * 2)
def test_set_scalar_subset_X(matrix_type, subset_func): adata = ad.AnnData(matrix_type(np.zeros((10, 10)))) orig_X_val = adata.X.copy() subset_idx = slice_subset(adata.obs_names) adata_subset = adata[subset_idx, :] adata_subset.X = 1 assert adata_subset.is_view assert np.all(asarray(adata[subset_idx, :].X) == 1) assert asarray((orig_X_val != adata.X)).sum() == mul(*adata_subset.shape)
def test_read_write_X(tmp_path, mtx_format, backed_mode, force_dense): base_pth = Path(tmp_path) orig_pth = base_pth / "orig.h5ad" backed_pth = base_pth / "backed.h5ad" orig = ad.AnnData(mtx_format(asarray(sparse.random(10, 10, format="csr")))) orig.write(orig_pth) backed = ad.read(orig_pth, backed=backed_mode) backed.write(backed_pth, as_dense=["X"]) backed.file.close() from_backed = ad.read(backed_pth) assert np.all(asarray(orig.X) == asarray(from_backed.X))
def test_assigmnent_dict(adata): d_obsp = dict( a=pd.DataFrame(np.ones((M, M)), columns=adata.obs_names, index=adata.obs_names), b=np.zeros((M, M)), c=sparse.random(M, M, format="csr"), ) d_varp = dict( a=pd.DataFrame(np.ones((N, N)), columns=adata.var_names, index=adata.var_names), b=np.zeros((N, N)), c=sparse.random(N, N, format="csr"), ) adata.obsp = d_obsp for k, v in d_obsp.items(): assert np.all(asarray(adata.obsp[k]) == asarray(v)) adata.varp = d_varp for k, v in d_varp.items(): assert np.all(asarray(adata.varp[k]) == asarray(v))
def assert_equal_ndarray(a, b, exact=False, elem_name=None): b = asarray(b) if not exact and is_numeric_dtype(a) and is_numeric_dtype(b): assert a.shape == b.shape, format_msg(elem_name) assert np.allclose(a, b, equal_nan=True), format_msg(elem_name) elif ( # Structured dtype not exact and hasattr(a, "dtype") and hasattr(b, "dtype") and len(a.dtype) > 1 and len(b.dtype) > 0): assert_equal(pd.DataFrame(a), pd.DataFrame(b), exact, elem_name) else: assert np.all(a == b), format_msg(elem_name)
def test_not_set_subset_X(matrix_type, subset_func): adata = ad.AnnData(matrix_type(asarray(sparse.random(20, 20)))) init_hash = joblib.hash(adata) orig_X_val = adata.X.copy() while True: subset_idx = slice_subset(adata.obs_names) if len(adata[subset_idx, :]) > 2: break subset = adata[subset_idx, :] subset = adata[:, subset_idx] internal_idx = _normalize_index(subset_func(np.arange(subset.X.shape[1])), subset.var_names) assert subset.is_view subset.X[:, internal_idx] = 1 assert not subset.is_view assert not np.any(asarray(adata.X != orig_X_val)) assert init_hash == joblib.hash(adata)
def test_inplace_subset_var(matrix_type, subset_func): orig = gen_adata((30, 30), X_type=matrix_type) subset_idx = subset_func(orig.var_names) modified = orig.copy() from_view = orig[:, subset_idx].copy() modified._inplace_subset_var(subset_idx) assert_array_equal(asarray(from_view.X), asarray(modified.X)) assert_array_equal(from_view.obs, modified.obs) assert_array_equal(from_view.var, modified.var) for k in from_view.obsm: assert_array_equal(asarray(from_view.obsm[k]), asarray(modified.obsm[k])) assert_array_equal(asarray(orig.obsm[k]), asarray(modified.obsm[k])) for k in from_view.varm: assert_array_equal(asarray(from_view.varm[k]), asarray(modified.varm[k])) for k in from_view.layers: assert_array_equal(asarray(from_view.layers[k]), asarray(modified.layers[k]))
def test_concatenate_layers_outer(array_type, fill_val): # Testing that issue #368 is fixed a = AnnData( X=np.ones((10, 20)), layers={"a": array_type(sparse.random(10, 20, format="csr"))}, ) b = AnnData(X=np.ones((10, 20))) c = a.concatenate(b, join="outer", fill_value=fill_val, batch_categories=["a", "b"]) np.testing.assert_array_equal( asarray(c[c.obs["batch"] == "b"].layers["a"]), fill_val )
def test_modify_view_component(matrix_type, mapping_name): adata = ad.AnnData( np.zeros((10, 10)), **{mapping_name: dict(m=matrix_type(asarray(sparse.random(10, 10))))}, ) init_hash = joblib.hash(adata) subset = adata[:5, :][:, :5] assert subset.is_view m = getattr(subset, mapping_name)["m"] m[0, 0] = 100 assert not subset.is_view assert getattr(subset, mapping_name)["m"][0, 0] == 100 assert init_hash == joblib.hash(adata)
def test_concatenate_fill_value(fill_val): def get_obs_els(adata): return { "X": adata.X, **{f"layer_{k}": adata.layers[k] for k in adata.layers}, **{f"obsm_{k}": adata.obsm[k] for k in adata.obsm}, } adata1 = gen_adata((10, 10)) adata1.obsm = { k: v for k, v in adata1.obsm.items() if not isinstance(v, pd.DataFrame) } adata2 = gen_adata((10, 5)) adata2.obsm = { k: v[:, :v.shape[1] // 2] for k, v in adata2.obsm.items() if not isinstance(v, pd.DataFrame) } adata3 = gen_adata((7, 3)) adata3.obsm = { k: v[:, :v.shape[1] // 3] for k, v in adata3.obsm.items() if not isinstance(v, pd.DataFrame) } joined = adata1.concatenate([adata2, adata3], join="outer", fill_value=fill_val) ptr = 0 for orig in [adata1, adata2, adata3]: cur = joined[ptr:ptr + orig.n_obs] cur_els = get_obs_els(cur) orig_els = get_obs_els(orig) for k, cur_v in cur_els.items(): orig_v = orig_els.get(k, sparse.csr_matrix((orig.n_obs, 0))) assert_equal(cur_v[:, :orig_v.shape[1]], orig_v) np.testing.assert_equal(asarray(cur_v[:, orig_v.shape[1]:]), fill_val) ptr += orig.n_obs
def assert_equal_h5py_dataset(a, b, exact=False, elem_name=None): a = asarray(a) assert_equal(b, a, exact, elem_name=elem_name)
def assert_equal_sparse(a, b, exact=False, elem_name=None): a = asarray(a) assert_equal(b, a, exact, elem_name=elem_name)
def assert_equal_arrayview(a, b, exact=False, elem_name=None): assert_equal(asarray(a), asarray(b), exact=exact, elem_name=elem_name)
def test_setter_singular_dim(shape, orig_array_type, new_array_type): # https://github.com/theislab/anndata/issues/500 adata = gen_adata(shape, X_type=orig_array_type) adata.X = new_array_type(np.ones(shape)) np.testing.assert_equal(asarray(adata.X), 1)