def test_view_delattr(attr, subset_func): base = gen_adata((10, 10)) orig_hash = joblib.hash(base) subset = base[subset_func(base.obs_names), subset_func(base.var_names)] empty = ad.AnnData(obs=subset.obs[[]], var=subset.var[[]]) delattr(subset, attr) assert not subset.is_view # Should now have same value as default assert_equal(getattr(subset, attr), getattr(empty, attr)) assert orig_hash == joblib.hash(base) # Original should not be modified
def test_backed_raw_subset(tmp_path, subset_func, subset_func2): backed_pth = tmp_path / "backed.h5ad" final_pth = tmp_path / "final.h5ad" mem_adata = gen_adata((10, 10)) mem_adata.raw = mem_adata obs_idx = subset_func(mem_adata.obs_names) var_idx = subset_func2(mem_adata.var_names) mem_adata.write(backed_pth) backed_adata = ad.read_h5ad(backed_pth, backed="r") backed_v = backed_adata[obs_idx, var_idx] assert backed_v.is_view mem_v = mem_adata[obs_idx, var_idx] assert_equal(backed_v, mem_v) # meaningful as objects are not equivalent? backed_v.write_h5ad(final_pth) final_adata = ad.read_h5ad(final_pth) # todo: Figure out why this doesn’t work if I don’t copy assert_equal(final_adata, mem_v.copy()) # todo: breaks when removing this line, b/c backed_v.X is not accessible backed_v = ad.read_h5ad(backed_pth, backed="r")[obs_idx, var_idx] del final_adata.raw # .raw is dropped when loading backed into memory. assert_equal(final_adata, backed_v.to_memory()) # assert loading into memory
def test_inplace_subset_obs(matrix_type, subset_func): orig = gen_adata((30, 30), X_type=matrix_type) subset_idx = subset_func(orig.obs_names) modified = orig.copy() from_view = orig[subset_idx, :].copy() modified._inplace_subset_obs(subset_idx) assert_equal(asarray(from_view.X), asarray(modified.X), exact=True) assert_equal(from_view.obs, modified.obs, exact=True) assert_equal(from_view.var, modified.var, exact=True) for k in from_view.obsm: assert_equal(asarray(from_view.obsm[k]), asarray(modified.obsm[k]), exact=True) for k in from_view.varm: assert_equal(asarray(from_view.varm[k]), asarray(modified.varm[k]), exact=True) assert_equal(asarray(orig.varm[k]), asarray(modified.varm[k]), exact=True) for k in from_view.layers: assert_equal(asarray(from_view.layers[k]), asarray(modified.layers[k]), exact=True)
def test_view_setattr_machinery(attr, subset_func, subset_func2): # Tests that setting attributes on a view doesn't mess anything up too bad adata = gen_adata((10, 10)) view = adata[subset_func(adata.obs_names), subset_func2(adata.var_names)] actual = view.copy() setattr(view, attr, getattr(actual, attr)) assert_equal(actual, view, exact=True)
def test_view_of_view(matrix_type, subset_func, subset_func2): adata = gen_adata((30, 15), X_type=matrix_type) if subset_func is single_subset: pytest.xfail("Other subset generating functions have trouble with this") var_s1 = subset_func(adata.var_names, min_size=4) var_view1 = adata[:, var_s1] var_s2 = subset_func2(var_view1.var_names) var_view2 = var_view1[:, var_s2] assert var_view2._adata_ref is adata obs_s1 = subset_func(adata.obs_names, min_size=4) obs_view1 = adata[obs_s1, :] obs_s2 = subset_func2(obs_view1.obs_names) assert adata[obs_s1, :][:, var_s1][obs_s2, :]._adata_ref is adata view_of_actual_copy = adata[:, var_s1].copy()[obs_s1, :].copy()[:, var_s2].copy() view_of_view_copy = adata[:, var_s1][obs_s1, :][:, var_s2].copy() # Check equivalence assert np.allclose( asarray(view_of_actual_copy.X), asarray(view_of_view_copy.X) ) assert not np.any(asarray(ne( view_of_actual_copy.obs, view_of_view_copy.obs ))) assert not np.any(asarray(ne( view_of_actual_copy.var, view_of_view_copy.var ))) for k in adata.obsm.keys(): assert not np.any(asarray(ne( view_of_actual_copy.obsm[k], view_of_view_copy.obsm[k] ))) for k in adata.varm.keys(): assert not np.any(asarray(ne( asarray(view_of_actual_copy.varm[k]), asarray(view_of_view_copy.varm[k]) ))) for k in adata.layers.keys(): assert not np.any(asarray(ne( asarray(view_of_actual_copy.layers[k]), asarray(view_of_view_copy.layers[k]) )))
def test_backed_indexing(ondisk_equivalent_adata, subset_func, subset_func2): csr_mem, csr_disk, csc_disk, dense_disk = ondisk_equivalent_adata obs_idx = subset_func(csr_mem.obs_names) var_idx = subset_func2(csr_mem.var_names) assert_equal(csr_mem[obs_idx, var_idx].X, csr_disk[obs_idx, var_idx].X) assert_equal(csr_mem[obs_idx, var_idx].X, csc_disk[obs_idx, var_idx].X) assert_equal(csr_mem[obs_idx, :].X, dense_disk[obs_idx, :].X) assert_equal(csr_mem[:, var_idx].X, dense_disk[:, var_idx].X)
def test_double_index(subset_func, subset_func2): adata = gen_adata((10, 10)) obs_subset = subset_func(adata.obs_names) var_subset = subset_func2(adata.var_names) v1 = adata[obs_subset, var_subset] v2 = adata[obs_subset, :][:, var_subset] assert np.all(asarray(v1.X) == asarray(v2.X)) assert np.all(v1.obs == v2.obs) assert np.all(v1.var == v2.var)
def test_inplace_subset_no_X(subset_func, dim): orig = gen_adata((30, 30)) del orig.X subset_idx = subset_func(getattr(orig, f"{dim}_names")) modified = orig.copy() from_view = subset_dim(orig, **{dim: subset_idx}).copy() getattr(modified, f"_inplace_subset_{dim}")(subset_idx) assert_equal(modified, from_view, exact=True)
def test_view_of_view(matrix_type, subset_func, subset_func2): adata = gen_adata((30, 15), X_type=matrix_type) adata.raw = adata if subset_func is single_subset: pytest.xfail( "Other subset generating functions have trouble with this") var_s1 = subset_func(adata.var_names, min_size=4) var_view1 = adata[:, var_s1] var_s2 = subset_func2(var_view1.var_names) var_view2 = var_view1[:, var_s2] assert var_view2._adata_ref is adata obs_s1 = subset_func(adata.obs_names, min_size=4) obs_view1 = adata[obs_s1, :] obs_s2 = subset_func2(obs_view1.obs_names) assert adata[obs_s1, :][:, var_s1][obs_s2, :]._adata_ref is adata view_of_actual_copy = ( adata[:, var_s1].copy()[obs_s1, :].copy()[:, var_s2].copy()) view_of_view_copy = adata[:, var_s1][obs_s1, :][:, var_s2].copy() assert_equal(view_of_actual_copy, view_of_view_copy, exact=True)
def test_set_var(adata, subset_func): init_hash = joblib.hash(adata) subset = adata[:, subset_func(adata.var_names)] new_var = pd.DataFrame( dict(a=np.ones(subset.n_vars), b=np.ones(subset.n_vars)), index=subset.var_names, ) assert subset.is_view subset.var = new_var assert not subset.is_view assert np.all(subset.var == new_var) assert joblib.hash(adata) == init_hash
def test_set_obs(adata, subset_func): init_hash = joblib.hash(adata) subset = adata[subset_func(adata.obs_names), :] new_obs = pd.DataFrame( dict(a=np.ones(subset.n_obs), b=np.ones(subset.n_obs)), index=subset.obs_names, ) assert subset.is_view subset.obs = new_obs assert not subset.is_view assert np.all(subset.obs == new_obs) assert joblib.hash(adata) == init_hash
def test_set_subset_varm(adata, subset_func): init_hash = joblib.hash(adata) orig_varm_val = adata.varm["o"].copy() while True: subset_idx = slice_subset(adata.var_names) if (adata[:, subset_idx]).shape[1] > 2: break subset = adata[:, subset_idx] internal_idx = subset_func(np.arange(subset.varm["o"].shape[0])) assert subset.is_view subset.varm["o"][internal_idx] = 1 assert not subset.is_view assert np.all(adata.varm["o"] == orig_varm_val) assert init_hash == joblib.hash(adata)
def test_set_subset_obsm(adata, subset_func): init_hash = joblib.hash(adata) orig_obsm_val = adata.obsm["o"].copy() while True: subset_idx = slice_subset(adata.obs_names) if len(adata[subset_idx, :]) > 2: break subset = adata[subset_idx, :] internal_idx = subset_func(np.arange(subset.obsm["o"].shape[0])) assert subset.is_view subset.obsm["o"][internal_idx] = 1 assert not subset.is_view assert np.all(adata.obsm["o"] == orig_obsm_val) assert init_hash == joblib.hash(adata)
def test_not_set_subset_X(matrix_type, subset_func): adata = ad.AnnData(matrix_type(asarray(sparse.random(20, 20)))) init_hash = joblib.hash(adata) orig_X_val = adata.X.copy() while True: subset_idx = slice_subset(adata.obs_names) if len(adata[subset_idx, :]) > 2: break subset = adata[subset_idx, :] subset = adata[:, subset_idx] internal_idx = subset_func(np.arange(subset.X.shape[1])) assert subset.is_view subset.X[:, internal_idx] = 1 assert not subset.is_view assert not np.any(asarray(adata.X != orig_X_val)) assert init_hash == joblib.hash(adata)
def test_backed_raw_subset(tmp_path, subset_func, subset_func2): backed_pth = tmp_path / "backed.h5ad" final_pth = tmp_path / "final.h5ad" mem_adata = gen_adata((10, 10)) mem_adata.raw = mem_adata obs_idx = subset_func(mem_adata.obs_names) var_idx = subset_func2(mem_adata.var_names) mem_adata.write(backed_pth) backed_adata = ad.read_h5ad(backed_pth, backed="r") backed_v = backed_adata[obs_idx, var_idx] assert backed_v.isview mem_v = mem_adata[obs_idx, var_idx] assert_equal(backed_v, mem_v) backed_v.write_h5ad(final_pth) final_adata = ad.read_h5ad(final_pth) # TODO: Figure out why this doesn't work if I don't copy assert_equal(final_adata, mem_v.copy())
def test_inplace_subset_var(matrix_type, subset_func): orig = gen_adata((30, 30), X_type=matrix_type) subset_idx = subset_func(orig.var_names) modified = orig.copy() from_view = orig[:, subset_idx].copy() modified._inplace_subset_var(subset_idx) assert_array_equal(asarray(from_view.X), asarray(modified.X)) assert_array_equal(from_view.obs, modified.obs) assert_array_equal(from_view.var, modified.var) for k in from_view.obsm: assert_array_equal(asarray(from_view.obsm[k]), asarray(modified.obsm[k])) assert_array_equal(asarray(orig.obsm[k]), asarray(modified.obsm[k])) for k in from_view.varm: assert_array_equal(asarray(from_view.varm[k]), asarray(modified.varm[k])) for k in from_view.layers: assert_array_equal(asarray(from_view.layers[k]), asarray(modified.layers[k]))
def test_backed_raw_subset(tmp_path, array_type, subset_func, subset_func2): backed_pth = tmp_path / "backed.h5ad" final_pth = tmp_path / "final.h5ad" mem_adata = gen_adata((10, 10), X_type=array_type) mem_adata.raw = mem_adata obs_idx = subset_func(mem_adata.obs_names) var_idx = subset_func2(mem_adata.var_names) if ( array_type is asarray and isinstance(obs_idx, (np.ndarray, sparse.spmatrix)) and isinstance(var_idx, (np.ndarray, sparse.spmatrix)) ): pytest.xfail( "Fancy indexing does not work with multiple arrays on a h5py.Dataset" ) mem_adata.write(backed_pth) ### Backed view has same values as in memory view ### backed_adata = ad.read_h5ad(backed_pth, backed="r") backed_v = backed_adata[obs_idx, var_idx] assert backed_v.is_view mem_v = mem_adata[obs_idx, var_idx] # Value equivalent assert_equal(mem_v, backed_v) # Type and value equivalent assert_equal(mem_v.copy(), backed_v.to_memory(), exact=True) assert backed_v.is_view assert backed_v.isbacked ### Write from backed view ### backed_v.write_h5ad(final_pth) final_adata = ad.read_h5ad(final_pth) assert_equal(mem_v, final_adata) assert_equal(final_adata, backed_v.to_memory()) # assert loading into memory