def read_adata(self, path): path_lc = path.lower() if path_lc.endswith('.loom'): return anndata.read_loom(path) elif path_lc.endswith('.zarr'): return anndata.read_zarr(path) elif path_lc.endswith('.tsv'): return read_star_fusion_file(path) elif path_lc.endswith('.rds'): # Seurat, convert to h5ad h5_file = path + '.h5ad' import os if not os.path.exists(h5_file) or abs(os.path.getmtime(h5_file) - os.path.getmtime(path)) > 0.00001: import subprocess import pkg_resources import shutil print('Converting Seurat object') if os.path.exists(h5_file): os.remove(h5_file) subprocess.check_call( ['Rscript', pkg_resources.resource_filename("cirrocumulus", 'seurat2h5ad.R'), path, h5_file]) shutil.copystat(path, h5_file) adata = anndata.read(h5_file, backed=self.backed) if adata.raw is not None and adata.shape[0] == adata.raw.shape[0]: print('Using adata.raw') adata = anndata.AnnData(X=adata.raw.X, var=adata.raw.var, obs=adata.obs, obsm=adata.obsm, uns=adata.uns) return adata return anndata.read(path, backed=self.backed)
def main(): options = get_options() barcodes = { 'CGTACTAG': 'tn5', 'TCCTGAGC': 'tn5', 'TCATGAGC': 'tn5', 'CCTGAGAT': 'tn5', 'TAAGGCGA': 'tnH', 'GCTACGCT': 'tnH', 'AGGCTCCG': 'tnH', 'CTGCGCAT': 'tnH' } if options.barcodes: barcodes = {} for line in open(options.barcodes): t = line.split() barcodes[t[0]] = t[1] ad_l = dict.fromkeys(barcodes.values()) for l in ad_l: bc_l = [x for x in barcodes if barcodes[x] == l] layer_files = [y for x in bc_l for y in options.input_files if x in y] ad_tmp = ad.read(layer_files[0]) for f in layer_files[1:]: _X = ad.read(f) _X = _X[ad_tmp.obs_names] ad_tmp.X = ad_tmp.X + _X.X ad_l[l] = ad_tmp.copy() adata = ad_l[options.Xdata].copy() for l in ad_l: adata.layers[l] = ad_l[l].X adata.write(f'{options.sample_name}.h5ad')
def _load_saved_gimvi_files( dir_path: str, load_seq_adata: bool, load_spatial_adata: bool, prefix: Optional[str] = None, map_location: Optional[Literal["cpu", "cuda"]] = None, ) -> Tuple[dict, dict, np.ndarray, np.ndarray, dict, AnnData, AnnData]: file_name_prefix = prefix or "" seq_data_path = os.path.join(dir_path, f"{file_name_prefix}adata_seq.h5ad") spatial_data_path = os.path.join(dir_path, f"{file_name_prefix}adata_spatial.h5ad") adata_seq, adata_spatial = None, None if load_seq_adata and os.path.exists(seq_data_path): adata_seq = read(seq_data_path) elif load_seq_adata and not os.path.exists(seq_data_path): raise ValueError( "Save path contains no saved anndata and no adata was passed.") if load_spatial_adata and os.path.exists(spatial_data_path): adata_spatial = read(spatial_data_path) elif load_spatial_adata and not os.path.exists(spatial_data_path): raise ValueError( "Save path contains no saved anndata and no adata was passed.") use_legacy = _should_use_legacy_saved_gimvi_files(dir_path, file_name_prefix) # TODO(jhong): Remove once legacy load is deprecated. if use_legacy: ( model_state_dict, seq_var_names, spatial_var_names, attr_dict, ) = _load_legacy_saved_gimvi_files(dir_path, file_name_prefix, map_location) else: model_path = os.path.join(dir_path, f"{file_name_prefix}model.pt") model = torch.load(model_path, map_location=map_location) model_state_dict = model["model_state_dict"] seq_var_names = model["seq_var_names"] spatial_var_names = model["spatial_var_names"] attr_dict = model["attr_dict"] return ( attr_dict, seq_var_names, spatial_var_names, model_state_dict, adata_seq, adata_spatial, )
def test_read_write_X(tmp_path, mtx_format, backed_mode, force_dense): base_pth = Path(tmp_path) orig_pth = base_pth / "orig.h5ad" backed_pth = base_pth / "backed.h5ad" orig = ad.AnnData(mtx_format(asarray(sparse.random(10, 10, format="csr")))) orig.write(orig_pth) backed = ad.read(orig_pth, backed=backed_mode) backed.write(backed_pth, as_dense=["X"]) backed.file.close() from_backed = ad.read(backed_pth) assert np.all(asarray(orig.X) == asarray(from_backed.X))
def test_readwrite_maintain_X_dtype(typ, backing_h5ad): X = typ(X_list) adata_src = ad.AnnData(X, dtype="int8") adata_src.write(backing_h5ad) adata = ad.read(backing_h5ad) assert adata.X.dtype == adata_src.X.dtype
def test_readwrite_h5ad_one_dimensino(typ, backing_h5ad): X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_one = adata_src[:, 0].copy() adata_one.write(backing_h5ad) adata = ad.read(backing_h5ad) assert adata.shape == (3, 1)
def app_conf(request, tmpdir_factory): dataset_path = "test-data/pbmc3k_no_raw.h5ad" if not request.param: app = create_app() configure_app(app, [dataset_path], None, None) dataset_id = dataset_path os.environ[CIRRO_TEST] = "false" else: os.environ[CIRRO_TEST] = "true" os.environ[CIRRO_DB_URI] = "mongodb://localhost:27018/cirrocumulus-test" app = cached_app() with app.test_client() as client: if request.param: # insert dataset output_dir = str(tmpdir_factory.mktemp("data").join("test.zarr")) PrepareData( datasets=[anndata.read(dataset_path)], output=output_dir, output_format="zarr", no_auto_groups=True, ).execute() r = client.post("/api/dataset", data=dict(url=output_dir, name="test")) dataset_id = r.json["id"] yield client, dataset_id
def test_readwrite_sparse_as_dense(backing_h5ad): adata_src = ad.AnnData(X_sp) adata_src.write(backing_h5ad, force_dense=True) adata = ad.read(backing_h5ad, chunk_size=2) assert issparse(adata.X) assert np.allclose(X_sp.toarray(), adata.X.toarray())
def preprocess(anndatafile): ann = anndata.read(anndatafile) counts = ann.X genes = ann.var.index.astype("str") cells = ann.obs["unique_cell_id"].values.astype("str") important_genes = rnaseqTools.geneSelection( counts, n=1000, decay=1.5, genes=genes, plot=False, ) librarySizes = np.sum(counts, axis=1) median = np.median(np.asarray(librarySizes).squeeze()) X = np.log2(counts[:, important_genes] / librarySizes * median + 1) X = np.array(X) X = X - X.mean(axis=0) U, s, V = np.linalg.svd(X, full_matrices=False) U[:, np.sum(V, axis=1) < 0] *= -1 X = np.dot(U, np.diag(s)) X = X[:, np.argsort(s)[::-1]][:, :50] # map the group assignments to a color stage = ann.obs["TimeID"].map(lambda x: lbl_map[x]).values.astype("str") alt_colors = ann.obs["TissueName"].map( lambda x: tissue_map[x]).values.astype("str") return X, stage, alt_colors
def _load_saved_files( dir_path: str, load_adata: bool, prefix: Optional[str] = None, map_location: Optional[Literal["cpu", "cuda"]] = None, ) -> Tuple[dict, np.ndarray, dict, AnnData]: """Helper to load saved files.""" file_name_prefix = prefix or "" adata_path = os.path.join(dir_path, f"{file_name_prefix}adata.h5ad") if os.path.exists(adata_path) and load_adata: adata = read(adata_path) elif not os.path.exists(adata_path) and load_adata: raise ValueError("Save path contains no saved anndata and no adata was passed.") else: adata = None use_legacy = _should_use_legacy_saved_files(dir_path, file_name_prefix) # TODO(jhong): Remove once legacy load is deprecated. if use_legacy: model_state_dict, var_names, attr_dict = _load_legacy_saved_files( dir_path, file_name_prefix, map_location ) else: model_path = os.path.join(dir_path, f"{file_name_prefix}model.pt") model = torch.load(model_path, map_location=map_location) model_state_dict = model["model_state_dict"] var_names = model["var_names"] attr_dict = model["attr_dict"] return attr_dict, var_names, model_state_dict, adata
def _load_purified_pbmc_dataset( save_path: str = "data/", subset_datasets: List[str] = None, ) -> anndata.AnnData: url = "https://github.com/YosefLab/scVI-data/raw/master/PurifiedPBMCDataset.h5ad" save_fn = "PurifiedPBMCDataset.h5ad" _download(url, save_path, save_fn) path_to_file = os.path.join(save_path, save_fn) adata = anndata.read(path_to_file) dataset_names = [ "cd4_t_helper", "regulatory_t", "naive_t", "memory_t", "cytotoxic_t", "naive_cytotoxic", "b_cells", "cd4_t_helper", "cd34", "cd56_nk", "cd14_monocytes", ] if subset_datasets is not None: row_indices = [] for dataset in subset_datasets: assert dataset in dataset_names idx = np.where(adata.obs["cell_types"] == dataset)[0] row_indices.append(idx) row_indices = np.concatenate(row_indices) adata = adata[row_indices].copy() return adata
def read_adata(path, spatial_directory=None, use_raw=False): if path.lower().endswith('.loom'): adata = anndata.read_loom(path) elif path.lower().endswith('.zarr'): adata = anndata.read_zarr(path) else: adata = anndata.read(path) if 'module' in adata.uns: adata.uns[ADATA_MODULE_UNS_KEY] = anndata.AnnData( X=adata.uns['module']['X'], var=adata.uns['module']['var']) if use_raw and adata.raw is not None and adata.shape[0] == adata.raw.shape[ 0]: logger.info('Using adata.raw') adata = anndata.AnnData(X=adata.raw.X, var=adata.raw.var, obs=adata.obs, obsm=adata.obsm, uns=adata.uns) if spatial_directory is not None: if not add_spatial(adata, spatial_directory): logger.info( 'No spatial data found in {}'.format(spatial_directory)) for field in categorical_fields_convert: if field in adata.obs and not pd.api.types.is_categorical_dtype( adata.obs[field]): logger.info('Converting {} to categorical'.format(field)) adata.obs[field] = adata.obs[field].astype(str).astype('category') return adata
def __init__(self): current_dir = os.path.dirname(os.path.realpath(__file__)) macos = anndata.read( os.path.join(current_dir, "macosko_dropseq_control.h5ad")) super(MacosDataset, self).__init__(macos, select_genes_keywords=["ercc"])
def __init__(self): current_dir = os.path.dirname(os.path.realpath(__file__)) klein = anndata.read( os.path.join(current_dir, "klein_indrops_control_GSM1599501.h5ad")) super(KleinDataset, self).__init__(klein, select_genes_keywords=["ercc"])
def __init__(self, n_genes=100): current_dir = os.path.dirname(os.path.realpath(__file__)) svens = anndata.read( os.path.join(current_dir, "svensson_chromium_control.h5ad")) sven2 = svens[svens.obs.query('sample == "20312"').index] super(Sven2DatasetRNA, self).__init__(sven2, n_genes=n_genes)
def __init__(self): current_dir = os.path.dirname(os.path.realpath(__file__)) zheng = anndata.read( os.path.join(current_dir, "zheng_gemcode_control.h5ad")) super(ZhengDataset, self).__init__(zheng, select_genes_keywords=["ercc"])
def __init__(self, data, select_genes_keywords=[]): super().__init__() if isinstance(data, str): anndataset = anndata.read(data) else: anndataset = data idx_and_gene_names = [ (idx, gene_name) for idx, gene_name in enumerate(list(anndataset.var.index)) ] for keyword in select_genes_keywords: idx_and_gene_names = [(idx, gene_name) for idx, gene_name in idx_and_gene_names if keyword.lower() in gene_name.lower()] gene_indices = np.array([idx for idx, _ in idx_and_gene_names]) gene_names = np.array( [gene_name for _, gene_name in idx_and_gene_names]) expression_mat = np.array(anndataset.X[:, gene_indices].todense()) select_cells = expression_mat.sum(axis=1) > 0 expression_mat = expression_mat[select_cells, :] select_genes = (expression_mat > 0).mean(axis=0) > 0.21 gene_names = gene_names[select_genes] expression_mat = expression_mat[:, select_genes] print("Final dataset shape :", expression_mat.shape) self.populate_from_data(X=expression_mat, gene_names=gene_names)
def make_partial_results_filenames(wildcards): # print('💙💙💙💙') STACKED_H5AD=f'stacked_h5ads/{wildcards.dataset_project_id}-{wildcards.dataset_sample_id}-stacked.h5ad' adata=anndata.read(STACKED_H5AD) total_cells = adata.n_obs n_retained_cells = int(0.85*total_cells) print('Total cells:', total_cells) print('Retained cells:', n_retained_cells) cells_sizes = [] #initial number of sampled cells sampling_size = 500 print('💙💙💙💙') print('total cells:', total_cells) print(sampling_size) print(n_retained_cells) print( '🔺🔺🔺') while sampling_size < n_retained_cells: cells_sizes.append(sampling_size) sampling_size = int(sampling_size*np.sqrt(2)) ss_depths = [depth for depth in adata.layers.keys()] print('🥖🥖🥖🥖🥖🥖🥖') print(dataset_project_id) print(dataset_sample_id) print(cells_sizes) print(ss_depths) print('🥖🥖🥖🥖🥖🥖🥖') return expand( 'scvi_output/partial_csvs/{{dataset_project_id}}-{{dataset_sample_id}}-c{ss_cells}-d{ss_depth}-SUCCESS.csv', ss_cells=cells_sizes, ss_depth=ss_depths )
def test_raw(backing_h5ad): X = np.array(X_list) adata = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict, dtype='int32') # init raw adata.raw = adata assert adata.raw[:, 0].X.tolist() == [1, 4, 7] adata = adata[:, [0, 1]] assert adata.var_names.tolist() == ['var1', 'var2'] assert adata.raw.var_names.tolist() == ['var1', 'var2', 'var3'] # read write adata.write(backing_h5ad) adata = ad.read(backing_h5ad) assert adata.raw[:, 0].X.tolist() == [1, 4, 7] assert adata.raw.var_names.tolist() == ['var1', 'var2', 'var3'] assert adata.var_names.tolist() == ['var1', 'var2']
def test_raw(backing_h5ad): X = np.array(X_list) adata = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict, dtype='int32') # init raw adata.raw = adata assert adata.raw[:, 0].X.tolist() == [[1], [4], [7]] adata = adata[:, [0, 1]] assert adata.var_names.tolist() == ['var1', 'var2'] assert adata.raw.var_names.tolist() == ['var1', 'var2', 'var3'] # read write with pytest.warns(ImplicitModificationWarning, match="Initializing view as actual"): # TODO: don’t modify adata just to write it adata.write(backing_h5ad) adata = ad.read(backing_h5ad) assert adata.raw[:, 0].X.tolist() == [[1], [4], [7]] assert adata.raw.var_names.tolist() == ['var1', 'var2', 'var3'] assert adata.var_names.tolist() == ['var1', 'var2']
def test_readwrite_h5ad(typ, dataset_kwargs, backing_h5ad): tmpdir = tempfile.TemporaryDirectory() tmpdirpth = Path(tmpdir.name) mid_pth = tmpdirpth / "mid.h5ad" X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) assert not is_categorical_dtype(adata_src.obs["oanno1"]) adata_src.raw = adata_src adata_src.write(backing_h5ad, **dataset_kwargs) adata_mid = ad.read(backing_h5ad) adata_mid.write(mid_pth, **dataset_kwargs) adata = ad.read_h5ad(mid_pth) assert is_categorical_dtype(adata.obs["oanno1"]) assert not is_categorical_dtype(adata.obs["oanno2"]) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"] assert is_categorical_dtype(adata.raw.var["vanno2"]) assert np.all(adata.obs == adata_src.obs) assert np.all(adata.var == adata_src.var) assert np.all(adata.var.index == adata_src.var.index) assert adata.var.index.dtype == adata_src.var.index.dtype assert type(adata.raw.X) is type(adata_src.raw.X) assert type(adata.raw.varm) is type(adata_src.raw.varm) assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X)) assert np.all(adata.raw.var == adata_src.raw.var) assert isinstance(adata.uns["uns4"]["a"], (int, np.integer)) assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer)) assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"]) assert_equal(adata, adata_src)
def _load_saved_files( dir_path: str, load_adata: bool, map_location: Optional[Literal["cpu", "cuda"]] = None, ): """Helper to load saved files.""" setup_dict_path = os.path.join(dir_path, "attr.pkl") adata_path = os.path.join(dir_path, "adata.h5ad") varnames_path = os.path.join(dir_path, "var_names.csv") model_path = os.path.join(dir_path, "model_params.pt") if os.path.exists(adata_path) and load_adata: adata = read(adata_path) elif not os.path.exists(adata_path) and load_adata: raise ValueError( "Save path contains no saved anndata and no adata was passed.") else: adata = None var_names = np.genfromtxt(varnames_path, delimiter=",", dtype=str) with open(setup_dict_path, "rb") as handle: attr_dict = pickle.load(handle) scvi_setup_dict = attr_dict.pop("scvi_setup_dict_") model_state_dict = torch.load(model_path, map_location=map_location) return scvi_setup_dict, attr_dict, var_names, model_state_dict, adata
def read_adata(path, spatial_directory=None, use_raw=False): if path.lower().endswith(".loom"): adata = anndata.read_loom(path) elif path.lower().endswith(".zarr"): adata = anndata.read_zarr(path) else: adata = anndata.read(path) if "module" in adata.uns: adata.uns[ADATA_MODULE_UNS_KEY] = anndata.AnnData( X=adata.uns["module"]["X"], var=adata.uns["module"]["var"] ) if use_raw and adata.raw is not None and adata.shape[0] == adata.raw.shape[0]: logger.info("Using adata.raw") adata = anndata.AnnData( X=adata.raw.X, var=adata.raw.var, obs=adata.obs, obsm=adata.obsm, uns=adata.uns ) if spatial_directory is not None: if not add_spatial(adata, spatial_directory): logger.info("No spatial data found in {}".format(spatial_directory)) for field in categorical_fields_convert: if field in adata.obs and not pd.api.types.is_categorical_dtype(adata.obs[field]): logger.info("Converting {} to categorical".format(field)) adata.obs[field] = adata.obs[field].astype(str).astype("category") return adata
def read_dataset(path, obs=None, var=None, obs_filter=None, var_filter=None, **keywords): """ Read h5ad, loom, mtx, 10X h5, and csv formatted files Parameters ---------- path: str File name of data file. obs: {str, pd.DataFrame} Path to obs data file or a data frame var: {str, pd.DataFrame} Path to var data file or a data frame obs_filter {str, pd.DataFrame} File with one id per line, name of a boolean field in obs, or a list of ids var_filter: {str, pd.DataFrame} File with one id per line, name of a boolean field in obs, or a list of ids Returns ------- Annotated data matrix. """ _, ext = os.path.splitext(str(path).lower()) if ext == '.txt': df = pd.read_csv(path, engine='python', header=0, sep=None, index_col=0) adata = anndata.AnnData(X=df.values, obs=pd.DataFrame(index=df.index), var=pd.DataFrame(index=df.columns)) elif ext == '.h5ad': adata = anndata.read(path) elif ext == '.loom': adata = anndata.read_loom(path) elif ext == '.mtx': adata = anndata.read_mtx(path) elif ext == '.zarr': adata = anndata.read_zarr(path) else: raise ValueError('Unknown file format: {}'.format(ext)) def get_df(meta): if not isinstance(meta, pd.DataFrame): tmp_path = None if meta.startswith('gs://'): tmp_path = download_gs_url(meta) meta = tmp_path meta = pd.read_csv(meta, sep=None, index_col='id', engine='python') if tmp_path is not None: os.remove(tmp_path) return meta if obs is not None: if not isinstance(obs, list) and not isinstance(obs, tuple): obs = [obs] for item in obs: adata.obs = adata.obs.join(get_df(item)) if var is not None: if not isinstance(var, list) and not isinstance(var, tuple): var = [var] for item in var: adata.var = adata.var.join(get_df(item)) return filter_adata(adata, obs_filter=obs_filter, var_filter=var_filter)
def __init__(self): current_dir = os.path.dirname(os.path.realpath(__file__)) svens = anndata.read( os.path.join(current_dir, "svensson_chromium_control.h5ad")) sven2 = svens[svens.obs.query('sample == "20312"').index] super(Sven2Dataset, self).__init__(sven2, select_genes_keywords=["ercc"])
def __init__(self, n_rna=100, threshold=0.01): current_dir = os.path.dirname(os.path.realpath(__file__)) klein = anndata.read( os.path.join(current_dir, 'klein_indrops_control_GSM1599501.h5ad')) super(KleinNegativeControlDataset, self).__init__(klein, n_rna=n_rna, threshold=threshold)
def __init__(self, n_rna=100, threshold=0.01): current_dir = os.path.dirname(os.path.realpath(__file__)) svens = anndata.read( os.path.join(current_dir, 'svensson_chromium_control.h5ad')) sven2 = svens[svens.obs.query('sample == "20312"').index] super(Svensson2NegativeControlDataset, self).__init__(sven2, n_rna=n_rna, threshold=threshold)
def __init__(self, n_rna=100, threshold=0.01): current_dir = os.path.dirname(os.path.realpath(__file__)) zheng = anndata.read( os.path.join(current_dir, 'zheng_gemcode_control.h5ad')) super(ZhengNegativeControlDataset, self).__init__(zheng, n_rna=n_rna, threshold=threshold)
def test_raw_rw(adata_raw, backing_h5ad): adata_raw.write(backing_h5ad) adata_read = ad.read(backing_h5ad) assert_equal(adata_read, adata_raw, exact=True) assert adata_raw.var_names.tolist() == ["var1", "var2"] assert adata_raw.raw.var_names.tolist() == ["var1", "var2", "var3"] assert adata_raw.raw[:, 0].X.tolist() == [[1], [4], [7]]
def main(): usage = 'hashsolo' parser = ArgumentParser(usage, formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument(dest='data_file', help='h5ad file containing cell hashing counts') parser.add_argument('-j', dest='model_json_file', default=None, help='json file to pass optional arguments') parser.add_argument('-o', dest='out_dir', default='hashsolo_output', help='Output directory for results') parser.add_argument('-p', dest='pre_existing_clusters', default=None, help='column in cell_hashing_data_file.obs to \ specifying different cell types or clusters') parser.add_argument('-q', dest='plot_name', default='hashing_qc_plots.pdf', help='name of plot to output') parser.add_argument('-n', dest='number_of_noise_barcodes', default=None, help='Number of barcodes to use to create noise \ distribution') args = parser.parse_args() model_json_file = args.model_json_file if model_json_file is not None: # read parameters with open(model_json_file) as model_json_open: params = json.load(model_json_open) else: params = {} data_file = args.data_file data_ext = os.path.splitext(data_file)[-1] if data_ext == '.h5ad': cell_hashing_adata = anndata.read(data_file) else: print('Unrecognized file format') if not os.path.isdir(args.out_dir): os.mkdir(args.out_dir) hashsolo(cell_hashing_adata, pre_existing_clusters=args.pre_existing_clusters, number_of_noise_barcodes=args.number_of_noise_barcodes, **params) cell_hashing_adata.write(os.path.join(args.out_dir, 'hashsoloed.h5ad')) plot_qc_checks_cell_hashing(cell_hashing_adata, fig_path=os.path.join(args.out_dir, args.plot_name))
def test_raw(): X = np.array(X_list) adata = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict, dtype='int32') # init raw adata.raw = adata assert adata.raw[:, 0].X.tolist() == [1, 4, 7] adata = adata[:, [0, 1]] assert adata.var_names.tolist() == ['var1', 'var2'] assert adata.raw.var_names.tolist() == ['var1', 'var2', 'var3'] # read write adata.write('./test.h5ad') adata = ad.read('./test.h5ad') assert adata.raw[:, 0].X.tolist() == [1, 4, 7] assert adata.raw.var_names.tolist() == ['var1', 'var2', 'var3'] assert adata.var_names.tolist() == ['var1', 'var2']