def test_readwrite_h5ad(typ, dataset_kwargs, backing_h5ad): tmpdir = tempfile.TemporaryDirectory() tmpdirpth = Path(tmpdir.name) mid_pth = tmpdirpth / "mid.h5ad" X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) assert not is_categorical(adata_src.obs["oanno1"]) adata_src.raw = adata_src adata_src.write(backing_h5ad, **dataset_kwargs) adata_mid = ad.read(backing_h5ad) adata_mid.write(mid_pth, **dataset_kwargs) adata = ad.read_h5ad(mid_pth) assert is_categorical(adata.obs["oanno1"]) assert not is_categorical(adata.obs["oanno2"]) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"] assert is_categorical(adata.raw.var["vanno2"]) assert np.all(adata.obs == adata_src.obs) assert np.all(adata.var == adata_src.var) assert np.all(adata.var.index == adata_src.var.index) assert adata.var.index.dtype == adata_src.var.index.dtype assert type(adata.raw.X) is type(adata_src.raw.X) assert type(adata.raw.varm) is type(adata_src.raw.varm) assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X)) assert np.all(adata.raw.var == adata_src.raw.var) assert isinstance(adata.uns["uns4"]["a"], (int, np.integer)) assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer)) assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"]) assert_equal(adata, adata_src)
def test_double_index(subset_func, subset_func2): adata = gen_adata((10, 10)) obs_subset = subset_func(adata.obs_names) var_subset = subset_func2(adata.var_names) v1 = adata[obs_subset, var_subset] v2 = adata[obs_subset, :][:, var_subset] assert np.all(asarray(v1.X) == asarray(v2.X)) assert np.all(v1.obs == v2.obs) assert np.all(v1.var == v2.var)
def test_scale_array(count_matrix_format, zero_center): """ Test that running sc.pp.scale on an anndata object and an array returns the same results. """ X = count_matrix_format(sp.random(100, 200, density=0.3).toarray()) adata = sc.AnnData(X=X.copy(), dtype=np.float64) sc.pp.scale(adata, zero_center=zero_center) scaled_X = sc.pp.scale(X, zero_center=zero_center, copy=True) assert np.array_equal(asarray(scaled_X), asarray(adata.X))
def test_view_of_view_modification(): adata = ad.AnnData(np.zeros((10, 10))) adata[0, :][:, 5:].X = np.ones(5) assert np.all(adata.X[0, 5:] == np.ones(5)) adata[[1, 2], :][:, [1, 2]].X = np.ones((2, 2)) assert np.all(adata.X[1:3, 1:3] == np.ones((2, 2))) adata.X = sparse.csr_matrix(adata.X) adata[0, :][:, 5:].X = np.ones(5) * 2 assert np.all(asarray(adata.X)[0, 5:] == np.ones(5) * 2) adata[[1, 2], :][:, [1, 2]].X = np.ones((2, 2)) * 2 assert np.all(asarray(adata.X)[1:3, 1:3] == np.ones((2, 2)) * 2)
def test_set_scalar_subset_X(matrix_type, subset_func): adata = ad.AnnData(matrix_type(np.zeros((10, 10)))) orig_X_val = adata.X.copy() subset_idx = slice_subset(adata.obs_names) adata_subset = adata[subset_idx, :] adata_subset.X = 1 assert adata_subset.is_view assert np.all(asarray(adata[subset_idx, :].X) == 1) assert asarray((orig_X_val != adata.X)).sum() == mul(*adata_subset.shape)
def test_read_write_X(tmp_path, mtx_format, backed_mode, force_dense): base_pth = Path(tmp_path) orig_pth = base_pth / "orig.h5ad" backed_pth = base_pth / "backed.h5ad" orig = ad.AnnData(mtx_format(asarray(sparse.random(10, 10, format="csr")))) orig.write(orig_pth) backed = ad.read(orig_pth, backed=backed_mode) backed.write(backed_pth, as_dense=["X"]) backed.file.close() from_backed = ad.read(backed_pth) assert np.all(asarray(orig.X) == asarray(from_backed.X))
def test_assigmnent_dict(adata): d_obsp = dict( a=pd.DataFrame(np.ones((M, M)), columns=adata.obs_names, index=adata.obs_names), b=np.zeros((M, M)), c=sparse.random(M, M, format="csr"), ) d_varp = dict( a=pd.DataFrame(np.ones((N, N)), columns=adata.var_names, index=adata.var_names), b=np.zeros((N, N)), c=sparse.random(N, N, format="csr"), ) adata.obsp = d_obsp for k, v in d_obsp.items(): assert np.all(asarray(adata.obsp[k]) == asarray(v)) adata.varp = d_varp for k, v in d_varp.items(): assert np.all(asarray(adata.varp[k]) == asarray(v))
def test_not_set_subset_X(matrix_type, subset_func): adata = ad.AnnData(matrix_type(asarray(sparse.random(20, 20)))) init_hash = joblib.hash(adata) orig_X_val = adata.X.copy() while True: subset_idx = slice_subset(adata.obs_names) if len(adata[subset_idx, :]) > 2: break subset = adata[subset_idx, :] subset = adata[:, subset_idx] internal_idx = subset_func(np.arange(subset.X.shape[1])) assert subset.is_view subset.X[:, internal_idx] = 1 assert not subset.is_view assert not np.any(asarray(adata.X != orig_X_val)) assert init_hash == joblib.hash(adata)
def test_inplace_subset_obs(matrix_type, subset_func): orig = gen_adata((30, 30), X_type=matrix_type) subset_idx = subset_func(orig.obs_names) modified = orig.copy() from_view = orig[subset_idx, :].copy() modified._inplace_subset_obs(subset_idx) assert_array_equal(asarray(from_view.X), asarray(modified.X)) assert_array_equal(from_view.obs, modified.obs) assert_array_equal(from_view.var, modified.var) for k in from_view.obsm: assert_array_equal(asarray(from_view.obsm[k]), asarray(modified.obsm[k])) for k in from_view.varm: assert_array_equal(asarray(from_view.varm[k]), asarray(modified.varm[k])) assert_array_equal(asarray(orig.varm[k]), asarray(modified.varm[k])) for k in from_view.layers: assert_array_equal(asarray(from_view.layers[k]), asarray(modified.layers[k]))
def check_rep_mutation(func, X, *, fields=["layer", "obsm"], **kwargs): """Check that only the array meant to be modified is modified.""" adata = sc.AnnData(X=X.copy(), dtype=X.dtype) for field in fields: sc.get._set_obs_rep(adata, X, **{field: field}) X_array = asarray(X) adata_X = func(adata, copy=True, **kwargs) adatas_proc = { field: func(adata, copy=True, **{field: field}, **kwargs) for field in fields } # Modified fields for field in fields: result_array = asarray( sc.get._get_obs_rep(adatas_proc[field], **{field: field})) np.testing.assert_array_equal(asarray(adata_X.X), result_array) # Unmodified fields for field in fields: np.testing.assert_array_equal(X_array, asarray(adatas_proc[field].X)) np.testing.assert_array_equal( X_array, asarray(sc.get._get_obs_rep(adata_X, **{field: field}))) for field_a, field_b in permutations(fields, 2): result_array = asarray( sc.get._get_obs_rep(adatas_proc[field_a], **{field_b: field_b})) np.testing.assert_array_equal(X_array, result_array)
def check_rep_mutation(func, X, **kwargs): """Check that only the array meant to be modified is modified.""" adata = sc.AnnData( X=X.copy(), layers={"layer": X.copy()}, obsm={"obsm": X.copy()}, dtype=X.dtype, ) adata_X = func(adata, copy=True, **kwargs) adata_layer = func(adata, layer="layer", copy=True, **kwargs) adata_obsm = func(adata, obsm="obsm", copy=True, **kwargs) assert np.array_equal(asarray(adata_X.X), asarray(adata_layer.layers["layer"])) assert np.array_equal(asarray(adata_X.X), asarray(adata_obsm.obsm["obsm"])) assert np.array_equal(asarray(adata_layer.X), asarray(adata_layer.obsm["obsm"])) assert np.array_equal(asarray(adata_obsm.X), asarray(adata_obsm.layers["layer"])) assert np.array_equal(asarray(adata_X.layers["layer"]), asarray(adata_X.obsm["obsm"]))
def test_modify_view_component(matrix_type, mapping_name): adata = ad.AnnData( np.zeros((10, 10)), **{mapping_name: dict(m=matrix_type(asarray(sparse.random(10, 10))))}, ) init_hash = joblib.hash(adata) subset = adata[:5, :][:, :5] assert subset.is_view m = getattr(subset, mapping_name)["m"] m[0, 0] = 100 assert not subset.is_view assert getattr(subset, mapping_name)["m"][0, 0] == 100 assert init_hash == joblib.hash(adata)
def test_readwrite_zarr(typ, tmp_path): X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_src.raw = adata_src assert not is_categorical(adata_src.obs["oanno1"]) adata_src.write_zarr(tmp_path / "test_zarr_dir", chunks=True) adata = ad.read_zarr(tmp_path / "test_zarr_dir") assert is_categorical(adata.obs["oanno1"]) assert not is_categorical(adata.obs["oanno2"]) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"] assert is_categorical(adata.raw.var["vanno2"]) assert np.all(adata.obs == adata_src.obs) assert np.all(adata.var == adata_src.var) assert np.all(adata.var.index == adata_src.var.index) assert adata.var.index.dtype == adata_src.var.index.dtype assert type(adata.raw.X) is type(adata_src.raw.X) assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X)) assert np.all(adata.raw.var == adata_src.raw.var) assert isinstance(adata.uns["uns4"]["a"], (int, np.integer)) assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer)) assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"]) assert_equal(adata, adata_src)
def test_concatenate_fill_value(fill_val): def get_obs_els(adata): return { "X": adata.X, **{f"layer_{k}": adata.layers[k] for k in adata.layers}, **{f"obsm_{k}": adata.obsm[k] for k in adata.obsm}, } adata1 = gen_adata((10, 10)) adata1.obsm = { k: v for k, v in adata1.obsm.items() if not isinstance(v, pd.DataFrame) } adata2 = gen_adata((10, 5)) adata2.obsm = { k: v[:, :v.shape[1] // 2] for k, v in adata2.obsm.items() if not isinstance(v, pd.DataFrame) } adata3 = gen_adata((7, 3)) adata3.obsm = { k: v[:, :v.shape[1] // 3] for k, v in adata3.obsm.items() if not isinstance(v, pd.DataFrame) } joined = adata1.concatenate([adata2, adata3], join="outer", fill_value=fill_val) ptr = 0 for orig in [adata1, adata2, adata3]: cur = joined[ptr:ptr + orig.n_obs] cur_els = get_obs_els(cur) orig_els = get_obs_els(orig) for k, cur_v in cur_els.items(): orig_v = orig_els.get(k, sparse.csr_matrix((orig.n_obs, 0))) assert_equal(cur_v[:, :orig_v.shape[1]], orig_v) np.testing.assert_equal(asarray(cur_v[:, orig_v.shape[1]:]), fill_val) ptr += orig.n_obs
def test_view_of_view(matrix_type, subset_func, subset_func2): adata = gen_adata((30, 15), X_type=matrix_type) if subset_func is single_subset: pytest.xfail("Other subset generating functions have trouble with this") var_s1 = subset_func(adata.var_names, min_size=4) var_view1 = adata[:, var_s1] var_s2 = subset_func2(var_view1.var_names) var_view2 = var_view1[:, var_s2] assert var_view2._adata_ref is adata obs_s1 = subset_func(adata.obs_names, min_size=4) obs_view1 = adata[obs_s1, :] obs_s2 = subset_func2(obs_view1.obs_names) assert adata[obs_s1, :][:, var_s1][obs_s2, :]._adata_ref is adata view_of_actual_copy = adata[:, var_s1].copy()[obs_s1, :].copy()[:, var_s2].copy() view_of_view_copy = adata[:, var_s1][obs_s1, :][:, var_s2].copy() # Check equivalence assert np.allclose( asarray(view_of_actual_copy.X), asarray(view_of_view_copy.X) ) assert not np.any(asarray(ne( view_of_actual_copy.obs, view_of_view_copy.obs ))) assert not np.any(asarray(ne( view_of_actual_copy.var, view_of_view_copy.var ))) for k in adata.obsm.keys(): assert not np.any(asarray(ne( view_of_actual_copy.obsm[k], view_of_view_copy.obsm[k] ))) for k in adata.varm.keys(): assert not np.any(asarray(ne( asarray(view_of_actual_copy.varm[k]), asarray(view_of_view_copy.varm[k]) ))) for k in adata.layers.keys(): assert not np.any(asarray(ne( asarray(view_of_actual_copy.layers[k]), asarray(view_of_view_copy.layers[k]) )))