def read_one_chunk(chunk_index): import gcsfs.mapping gcs = gcsfs.GCSFileSystem(gcs_project, token=gcs_token) store = gcsfs.mapping.GCSMap(gcs_path, gcs=gcs) adata = ad.read_zarr(store) return read_zarr_chunk(adata.X, chunk_size, chunk_index)
def read_adata(self, path): path_lc = path.lower() if path_lc.endswith('.loom'): return anndata.read_loom(path) elif path_lc.endswith('.zarr'): return anndata.read_zarr(path) elif path_lc.endswith('.tsv'): return read_star_fusion_file(path) elif path_lc.endswith('.rds'): # Seurat, convert to h5ad h5_file = path + '.h5ad' import os if not os.path.exists(h5_file) or abs(os.path.getmtime(h5_file) - os.path.getmtime(path)) > 0.00001: import subprocess import pkg_resources import shutil print('Converting Seurat object') if os.path.exists(h5_file): os.remove(h5_file) subprocess.check_call( ['Rscript', pkg_resources.resource_filename("cirrocumulus", 'seurat2h5ad.R'), path, h5_file]) shutil.copystat(path, h5_file) adata = anndata.read(h5_file, backed=self.backed) if adata.raw is not None and adata.shape[0] == adata.raw.shape[0]: print('Using adata.raw') adata = anndata.AnnData(X=adata.raw.X, var=adata.raw.var, obs=adata.obs, obsm=adata.obsm, uns=adata.uns) return adata return anndata.read(path, backed=self.backed)
def read_adata(path, spatial_directory=None, use_raw=False): if path.lower().endswith('.loom'): adata = anndata.read_loom(path) elif path.lower().endswith('.zarr'): adata = anndata.read_zarr(path) else: adata = anndata.read(path) if 'module' in adata.uns: adata.uns[ADATA_MODULE_UNS_KEY] = anndata.AnnData( X=adata.uns['module']['X'], var=adata.uns['module']['var']) if use_raw and adata.raw is not None and adata.shape[0] == adata.raw.shape[ 0]: logger.info('Using adata.raw') adata = anndata.AnnData(X=adata.raw.X, var=adata.raw.var, obs=adata.obs, obsm=adata.obsm, uns=adata.uns) if spatial_directory is not None: if not add_spatial(adata, spatial_directory): logger.info( 'No spatial data found in {}'.format(spatial_directory)) for field in categorical_fields_convert: if field in adata.obs and not pd.api.types.is_categorical_dtype( adata.obs[field]): logger.info('Converting {} to categorical'.format(field)) adata.obs[field] = adata.obs[field].astype(str).astype('category') return adata
def read_adata(path, spatial_directory=None, use_raw=False): if path.lower().endswith(".loom"): adata = anndata.read_loom(path) elif path.lower().endswith(".zarr"): adata = anndata.read_zarr(path) else: adata = anndata.read(path) if "module" in adata.uns: adata.uns[ADATA_MODULE_UNS_KEY] = anndata.AnnData( X=adata.uns["module"]["X"], var=adata.uns["module"]["var"] ) if use_raw and adata.raw is not None and adata.shape[0] == adata.raw.shape[0]: logger.info("Using adata.raw") adata = anndata.AnnData( X=adata.raw.X, var=adata.raw.var, obs=adata.obs, obsm=adata.obsm, uns=adata.uns ) if spatial_directory is not None: if not add_spatial(adata, spatial_directory): logger.info("No spatial data found in {}".format(spatial_directory)) for field in categorical_fields_convert: if field in adata.obs and not pd.api.types.is_categorical_dtype(adata.obs[field]): logger.info("Converting {} to categorical".format(field)) adata.obs[field] = adata.obs[field].astype(str).astype("category") return adata
def read_adata(self, filesystem, path): path_lc = path.lower() path_lc = path_lc.rstrip('/') if path_lc.endswith('.loom'): adata = anndata.read_loom(filesystem.open(path)) elif path_lc.endswith('.zarr'): adata = anndata.read_zarr(filesystem.get_mapper(path)) elif path_lc.endswith('.tsv'): adata = read_star_fusion_file(filesystem.open(path)) elif path_lc.endswith('.rds'): # Seurat, convert to h5ad h5_file = path + '.h5ad' import os if not os.path.exists(h5_file) or abs(os.path.getmtime(h5_file) - os.path.getmtime(path)) > 0.00001: import subprocess import pkg_resources import shutil print('Converting Seurat object') if os.path.exists(h5_file): os.remove(h5_file) subprocess.check_call( ['Rscript', pkg_resources.resource_filename("cirrocumulus", 'seurat2h5ad.R'), path, h5_file]) shutil.copystat(path, h5_file) adata = anndata.read_h5ad(h5_file, backed='r' if self.backed else None) if adata.raw is not None and adata.shape[0] == adata.raw.shape[0]: print('Using adata.raw') adata = anndata.AnnData(X=adata.raw.X, var=adata.raw.var, obs=adata.obs, obsm=adata.obsm, uns=adata.uns) else: adata = anndata.read_h5ad(filesystem.open(path), backed='r' if self.backed else None) if 'module' in adata.uns: adata.uns[ADATA_MODULE_UNS_KEY] = anndata.AnnData(X=adata.uns['module']['X'], var=adata.uns['module']['var']) return adata
def adata_dist(self, sc, request): # regular anndata except for X, which we replace on the next line a = ad.read_zarr(input_file) input_file_X = input_file + "/X" if request.param == "direct": a.X = zappy.direct.from_zarr(input_file_X) yield a elif request.param == "executor": with concurrent.futures.ThreadPoolExecutor( max_workers=2) as executor: a.X = zappy.executor.from_zarr(executor, input_file_X) yield a elif request.param == "spark": a.X = zappy.spark.from_zarr(sc, input_file_X) yield a elif request.param == "dask": a.X = da.from_zarr(input_file_X) yield a elif request.param == "pywren": import s3fs.mapping s3 = s3fs.S3FileSystem() input_file_X = s3fs.mapping.S3Map( "sc-tom-test-data/10x-10k-subset.zarr/X", s3=s3) executor = zappy.executor.PywrenExecutor() a.X = zappy.executor.from_zarr(executor, input_file_X) yield a
def test_readwrite_zarr(typ, tmp_path): X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_src.raw = adata_src assert not is_categorical_dtype(adata_src.obs["oanno1"]) adata_src.write_zarr(tmp_path / "test_zarr_dir", chunks=True) adata = ad.read_zarr(tmp_path / "test_zarr_dir") assert is_categorical_dtype(adata.obs["oanno1"]) assert not is_categorical_dtype(adata.obs["oanno2"]) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"] assert adata.obs["oanno1c"].cat.categories.tolist() == ["cat1"] assert is_categorical_dtype(adata.raw.var["vanno2"]) pd.testing.assert_frame_equal(adata.obs, adata_src.obs) pd.testing.assert_frame_equal(adata.var, adata_src.var) assert np.all(adata.var.index == adata_src.var.index) assert adata.var.index.dtype == adata_src.var.index.dtype assert type(adata.raw.X) is type(adata_src.raw.X) assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X)) assert np.all(adata.raw.var == adata_src.raw.var) assert isinstance(adata.uns["uns4"]["a"], (int, np.integer)) assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer)) assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"]) assert_equal(adata, adata_src)
def test_backwards_compat_files(archive_dir): with pytest.warns(ad.OldFormatWarning): from_h5ad = ad.read_h5ad(archive_dir / "adata.h5ad") with pytest.warns(ad.OldFormatWarning): from_zarr = ad.read_zarr(archive_dir / "adata.zarr.zip") assert_equal(from_h5ad, from_zarr, exact=True)
def read_dataset(path, obs=None, var=None, obs_filter=None, var_filter=None, **keywords): """ Read h5ad, loom, mtx, 10X h5, and csv formatted files Parameters ---------- path: str File name of data file. obs: {str, pd.DataFrame} Path to obs data file or a data frame var: {str, pd.DataFrame} Path to var data file or a data frame obs_filter {str, pd.DataFrame} File with one id per line, name of a boolean field in obs, or a list of ids var_filter: {str, pd.DataFrame} File with one id per line, name of a boolean field in obs, or a list of ids Returns ------- Annotated data matrix. """ _, ext = os.path.splitext(str(path).lower()) if ext == '.txt': df = pd.read_csv(path, engine='python', header=0, sep=None, index_col=0) adata = anndata.AnnData(X=df.values, obs=pd.DataFrame(index=df.index), var=pd.DataFrame(index=df.columns)) elif ext == '.h5ad': adata = anndata.read(path) elif ext == '.loom': adata = anndata.read_loom(path) elif ext == '.mtx': adata = anndata.read_mtx(path) elif ext == '.zarr': adata = anndata.read_zarr(path) else: raise ValueError('Unknown file format: {}'.format(ext)) def get_df(meta): if not isinstance(meta, pd.DataFrame): tmp_path = None if meta.startswith('gs://'): tmp_path = download_gs_url(meta) meta = tmp_path meta = pd.read_csv(meta, sep=None, index_col='id', engine='python') if tmp_path is not None: os.remove(tmp_path) return meta if obs is not None: if not isinstance(obs, list) and not isinstance(obs, tuple): obs = [obs] for item in obs: adata.obs = adata.obs.join(get_df(item)) if var is not None: if not isinstance(var, list) and not isinstance(var, tuple): var = [var] for item in var: adata.var = adata.var.join(get_df(item)) return filter_adata(adata, obs_filter=obs_filter, var_filter=var_filter)
def test_write_zarr(self): log1p(self.adata_rdd) output_file_zarr = tmp_dir() self.adata_rdd.write_zarr(output_file_zarr, chunks=(2, 5)) # read back as zarr (without using RDDs) and check it is the same as self.adata.X adata_log1p = ad.read_zarr(output_file_zarr) log1p(self.adata) self.assertTrue(np.array_equal(adata_log1p.X, self.adata.X))
def from_zarr(cls, sc, zarr_file): """ Read a Zarr file as an anndata object (for the metadata) and with the data matrix (X) as an RDD of numpy arrays. """ adata = ad.read_zarr(zarr_file) chunk_size = zarr.open(zarr_file, mode='r')['X'].chunks return cls._from_anndata(sc, adata, chunk_size, read_chunk_zarr(zarr_file, chunk_size))
def test_readwrite_zarr(typ, tmp_path): X = typ(X_list) adata = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) assert pd.api.types.is_string_dtype(adata.obs['oanno1']) adata.write_zarr(tmp_path / 'test_zarr_dir', chunks=True) adata = ad.read_zarr(tmp_path / 'test_zarr_dir') assert pd.api.types.is_categorical(adata.obs['oanno1']) assert pd.api.types.is_string_dtype(adata.obs['oanno2']) assert adata.obs.index.tolist() == ['name1', 'name2', 'name3'] assert adata.obs['oanno1'].cat.categories.tolist() == ['cat1', 'cat2']
def colsum(zarr_file): print("Running colsum for %s" % zarr_file) client = Client() gcs = gcsfs.GCSFileSystem('hca-scale', token='cloud') store = gcsfs.mapping.GCSMap(zarr_file, gcs=gcs) adata = ad.read_zarr(store) adata.X = da.from_zarr(store, component='X') s = np.sum(adata.X, axis=0) s.compute()
def test_h5ad_to_zarr(self, h5ad_file, tmpdir): p = tmpdir.join("filtered_gene_bc_matrices.zarr") input = h5ad_file output = str(p) convert(input, output) # read back and check a few things adata = read_zarr(output) assert adata.X.shape == (2700, 32738) assert adata.obs.shape == (2700, 0) assert adata.var.shape == (32738, 1)
def test_zarr_chunk_X(tmp_path): import zarr zarr_pth = Path(tmp_path) / "test.zarr" adata = gen_adata((100, 100), X_type=np.array) adata.write_zarr(zarr_pth, chunks=(10, 10)) z = zarr.open(str(zarr_pth)) # As of v2.3.2 zarr won’t take a Path assert z["X"].chunks == (10, 10) from_zarr = ad.read_zarr(zarr_pth) assert_equal(from_zarr, adata)
def from_zarr_gcs(cls, sc, gcs_path, gcs_project, gcs_token='cloud'): """ Read a Zarr file from GCS as an anndata object (for the metadata) and with the data matrix (X) as an RDD of numpy arrays. """ import gcsfs.mapping gcs = gcsfs.GCSFileSystem(gcs_project, token=gcs_token) store = gcsfs.mapping.GCSMap(gcs_path, gcs=gcs) adata = ad.read_zarr(store) chunk_size = zarr.open(store, mode='r')['X'].chunks return cls._from_anndata( sc, adata, chunk_size, read_chunk_zarr_gcs(gcs_path, chunk_size, gcs_project, gcs_token))
def read_adata(self, filesystem, path): path_lc = path.lower() path_lc = path_lc.rstrip("/") if path_lc.endswith(".loom"): adata = anndata.read_loom(filesystem.open(path)) elif path_lc.endswith(".zarr"): adata = anndata.read_zarr(filesystem.get_mapper(path)) elif path_lc.endswith(".tsv"): adata = read_star_fusion_file(filesystem.open(path)) elif path_lc.endswith(".rds"): # Seurat, convert to h5ad h5_file = path + ".h5ad" import os if (not os.path.exists(h5_file) or abs(os.path.getmtime(h5_file) - os.path.getmtime(path)) > 0.00001): import shutil import subprocess import pkg_resources print("Converting Seurat object") if os.path.exists(h5_file): os.remove(h5_file) subprocess.check_call([ "Rscript", pkg_resources.resource_filename("cirrocumulus", "seurat2h5ad.R"), path, h5_file, ]) shutil.copystat(path, h5_file) adata = anndata.read_h5ad(h5_file, backed="r" if self.backed else None) if adata.raw is not None and adata.shape[0] == adata.raw.shape[0]: print("Using adata.raw") adata = anndata.AnnData(X=adata.raw.X, var=adata.raw.var, obs=adata.obs, obsm=adata.obsm, uns=adata.uns) else: if self.backed: adata = anndata.read_h5ad(path, backed="r") else: adata = anndata.read_h5ad(filesystem.open(path)) if "module" in adata.uns: adata.uns[ADATA_MODULE_UNS_KEY] = anndata.AnnData( X=adata.uns["module"]["X"], var=adata.uns["module"]["var"]) return adata
def adata_dist(self, request): # regular anndata except for X, which we replace on the next line a = ad.read_zarr(input_file) input_file_X = input_file + "/X" if request.param == "direct": import zappy.direct a.X = zappy.direct.from_zarr(input_file_X) yield a elif request.param == "dask": import dask.array as da a.X = da.from_zarr(input_file_X) yield a
def test_write_zarr(self, adata, adata_dist): log1p(adata_dist) temp_store = zarr.TempStore() chunks = adata_dist.X.chunks # write metadata using regular anndata adata.write_zarr(temp_store, chunks) if isinstance(adata_dist.X, da.Array): adata_dist.X.to_zarr(temp_store.dir_path("X")) else: adata_dist.X.to_zarr(temp_store.dir_path("X"), chunks) # read back as zarr (without using RDDs) and check it is the same as adata.X adata_log1p = ad.read_zarr(temp_store) log1p(adata) npt.assert_allclose(adata_log1p.X, adata.X)
def test_backwards_compat_zarr(): import scanpy as sc import zarr pbmc_orig = sc.datasets.pbmc68k_reduced() # Old zarr writer couldn’t do sparse arrays pbmc_orig.raw._X = pbmc_orig.raw.X.toarray() del pbmc_orig.uns["neighbors"] # This was written out with anndata=0.6.22.post1 zarrpth = HERE / "data/pbmc68k_reduced_legacy.zarr.zip" with zarr.ZipStore(zarrpth, mode="r") as z: pbmc_zarr = ad.read_zarr(z) assert_equal(pbmc_zarr, pbmc_orig)
def test_readwrite_equivalent_h5ad_zarr(typ): tmpdir = tempfile.TemporaryDirectory() tmpdirpth = Path(tmpdir.name) h5ad_pth = tmpdirpth / "adata.h5ad" zarr_pth = tmpdirpth / "adata.zarr" M, N = 100, 101 adata = gen_adata((M, N), X_type=typ) adata.raw = adata adata.write_h5ad(h5ad_pth) adata.write_zarr(zarr_pth) from_h5ad = ad.read_h5ad(h5ad_pth) from_zarr = ad.read_zarr(zarr_pth) assert_equal(from_h5ad, from_zarr, exact=True)
def test_write_zarr(self, adata, adata_dist): import dask.array as da import zarr log1p(adata_dist) temp_store = zarr.TempStore() chunks = adata_dist.X.chunks if isinstance(chunks[0], tuple): chunks = (chunks[0][0], ) + chunks[1] # write metadata using regular anndata adata.write_zarr(temp_store, chunks) if isinstance(adata_dist.X, da.Array): adata_dist.X.to_zarr(temp_store.dir_path("X"), overwrite=True) else: adata_dist.X.to_zarr(temp_store.dir_path("X"), chunks) # read back as zarr directly and check it is the same as adata.X adata_log1p = ad.read_zarr(temp_store) log1p(adata) npt.assert_allclose(adata_log1p.X, adata.X)
def test_readwrite_zarr(typ, tmp_path): X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_src.raw = adata_src assert not is_categorical(adata_src.obs['oanno1']) adata_src.write_zarr(tmp_path / 'test_zarr_dir', chunks=True) adata = ad.read_zarr(tmp_path / 'test_zarr_dir') assert is_categorical(adata.obs['oanno1']) assert not is_categorical(adata.obs['oanno2']) assert adata.obs.index.tolist() == ['name1', 'name2', 'name3'] assert adata.obs['oanno1'].cat.categories.tolist() == ['cat1', 'cat2'] assert is_categorical(adata.raw.var['vanno2']) assert np.all(adata.obs == adata_src.obs) assert np.all(adata.var == adata_src.var) assert np.all(adata.var.index == adata_src.var.index) assert adata.var.index.dtype == adata_src.var.index.dtype assert type(adata.raw.X) == type(adata_src.raw.X) assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X)) assert np.all(adata.raw.var == adata_src.raw.var) assert isinstance(adata.uns["uns4"]["a"], (int, np.integer)) assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer)) assert type(adata.uns["uns4"]["c"]) == type(adata_src.uns["uns4"]["c"]) assert_equal(adata, adata_src)
def test_zarr_compression(tmp_path): from numcodecs import Blosc pth = str(Path(tmp_path) / "adata.zarr") adata = gen_adata((10, 8)) compressor = Blosc(cname="zstd", clevel=3, shuffle=Blosc.BITSHUFFLE) not_compressed = [] ad._io.write_zarr(pth, adata, compressor=compressor) def check_compressed(key, value): if isinstance(value, zarr.Array) and value.shape != (): if value.compressor != compressor: not_compressed.append(key) with zarr.open(str(pth), "r") as f: f.visititems(check_compressed) if not_compressed: msg = "\n\t".join(not_compressed) raise AssertionError( f"These elements were not compressed correctly:\n\t{msg}") assert_equal(adata, ad.read_zarr(pth))
executor = zappy.executor.PywrenExecutor(live_viewer=True, exclude_modules=None, ignore_modules=[ 'dash', 'dash_html_components', 'dash_core_components', 'dask', 'google_auth_oauthlib', 'pandas', 'pytest' ]) s3 = s3fs.S3FileSystem() if s3.exists('sc-tom-test-data/10x-log1p.zarr'): s3.rm('sc-tom-test-data/10x-log1p.zarr', recursive=True) input_zarr = s3fs.mapping.S3Map( 'sc-tom-test-data/10x/anndata_zarr_2000/10x.zarr', s3=s3) input_zarr_X = s3fs.mapping.S3Map( 'sc-tom-test-data/10x/anndata_zarr_2000/10x.zarr/X', s3=s3) intermediate_zarr = s3fs.mapping.S3Map('sc-tom-test-data/intermediate.zarr', s3=s3) output_zarr = s3fs.mapping.S3Map('sc-tom-test-data/10x-log1p.zarr', s3=s3) # regular anndata except for X adata = ad.read_zarr(input_zarr) adata.X = zappy.executor.from_zarr(executor, input_zarr_X, intermediate_store=intermediate_zarr) recipe_zheng17(adata) adata.X.to_zarr(output_zarr, adata.X.chunks)
def handle_job(): email = get_auth().auth()["email"] database_api = get_database() if request.method == "DELETE": content = request.get_json(force=True, cache=False) job_id = content.get("id", "") database_api.delete_job(email, job_id) delete_job(job_id) return json_response("", 204) elif request.method == "POST": if os.environ.get("GAE_APPLICATION") is None: # TODO content = request.get_json(force=True, cache=False) email, dataset = get_email_and_dataset(content) params = content.get("params") job_type = content.get("type") job_name = content.get("name") return dict(id=submit_job( database_api=database_api, dataset_api=dataset_api, email=email, dataset=dataset, job_name=job_name, job_type=job_type, params=params, )) else: raise ValueError("Submit job not supported on GAE") else: job_id = request.args["id"] c = request.args["c"] is_precomputed = job_id.startswith("cirro-") if c == "status" or c == "params": if is_precomputed: job = dict(status="complete") if c == "status" else dict() else: job = database_api.get_job(email=email, job_id=job_id, return_type=c) if job is None: return json_response("", 404) # job deleted return json_response(job, 200) if c != "result": raise ValueError("c must be one of status, params, or result") if is_precomputed: # precomputed result email = get_auth().auth()["email"] suggested_dataset_id = request.args["ds"] dataset = database_api.get_dataset(email, suggested_dataset_id) # precomputed results need to be a child of dataset dataset["url"] = map_url(dataset["url"]) job_result = dataset_api.get_result(dataset, job_id) if get_scheme( job_result) == "file" and not os.path.exists(job_result): return Response(job_result, content_type="application/json") else: return send_file(job_result) job = database_api.get_job(email=email, job_id=job_id, return_type=c) if job is None: return json_response("", 404) # job deleted import anndata if isinstance(job, dict) and "url" in job: url = job["url"] content_type = job.get("content-type") if content_type == "application/h5ad" or content_type == "application/zarr": if content_type == "application/h5ad": with get_fs(url).open(url, mode="rb") as f: adata = anndata.read(f) else: adata = anndata.read_zarr(get_fs(url).get_mapper(url)) adata_df = adata_to_df(adata) return Response( adata_df.to_json(double_precision=2, orient="records"), content_type="application/json", ) else: # URL to JSON or text return send_file(url) elif isinstance(job, dict): return json_response(job) elif isinstance(job, anndata.AnnData): return Response( adata_to_df(job).to_json(double_precision=2, orient="records"), content_type="application/json", ) return job
def adata(self): a = ad.read_zarr(input_file) # regular anndata a.X = a.X[:] # convert to numpy array return a
# pip3 uninstall -y anndata # pip3 install git+https://github.com/tomwhite/anndata@zarr # pip3 install gcsfs # pip3 list import anndata as ad import gcsfs.mapping from scanpy.api.pp import log1p from scanpy.api.pp import recipe_zheng17 gcs = gcsfs.GCSFileSystem('hca-scale', token='cloud') store = gcsfs.mapping.GCSMap('ll-sc-data-bkup/10x/anndata_zarr_2000/10x.zarr', gcs=gcs) #store = gcsfs.mapping.GCSMap('ll-sc-data-bkup/10x/10x-10k-subset.zarr', gcs=gcs) output = gcsfs.mapping.GCSMap('ll-sc-data-bkup/10x/anndata_zarr_out/10x.zarr', gcs=gcs) adata = ad.read_zarr(store) import time start = time.time() adata.X = adata.X[:] # materialize in memory since Zarr doesn't support all the operations scanpy calls recipe_zheng17(adata) adata.write_zarr(output, chunks=(2000, adata.n_vars)) end = time.time() print(end - start) # 1080.5862543582916 # This is 18 minutes - note that it *doesn't* write back to cloud storage
def read_one_chunk(chunk_index): adata = ad.read_zarr(zarr_file) return read_adata_chunk(adata, chunk_size, chunk_index)
def handle_job(): email = get_auth().auth()['email'] database_api = get_database() if request.method == 'DELETE': content = request.get_json(force=True, cache=False) job_id = content.get('id', '') database_api.delete_job(email, job_id) delete_job(job_id) return json_response('', 204) elif request.method == 'POST': if os.environ.get('GAE_APPLICATION') is None: # TODO content = request.get_json(force=True, cache=False) email, dataset = get_email_and_dataset(content) params = content.get('params') job_type = content.get('type') job_name = content.get('name') return dict(id=submit_job(database_api=database_api, dataset_api=dataset_api, email=email, dataset=dataset, job_name=job_name, job_type=job_type, params=params)) else: raise ValueError('Submit job not supported on GAE') else: job_id = request.args['id'] c = request.args['c'] is_precomputed = job_id.startswith('cirro-') if c == 'status' or c == 'params': if is_precomputed: job = dict(status='complete') if c == 'status' else dict() else: job = database_api.get_job(email=email, job_id=job_id, return_type=c) if job is None: return json_response('', 404) # job deleted return json_response(job, 200) if c != 'result': raise ValueError('c must be one of status, params, or result') if is_precomputed: # precomputed result email = get_auth().auth()['email'] suggested_dataset_id = request.args['ds'] dataset = database_api.get_dataset(email, suggested_dataset_id) # precomputed results need to be a child of dataset dataset['url'] = map_url(dataset['url']) job_result = dataset_api.get_result(dataset, job_id) if get_scheme(job_result) == 'file' and not os.path.exists(job_result): return Response(job_result, content_type='application/json') else: return send_file(job_result) job = database_api.get_job(email=email, job_id=job_id, return_type=c) if job is None: return json_response('', 404) # job deleted import anndata if isinstance(job, dict) and 'url' in job: url = job['url'] content_type = job.get('content-type') if content_type == 'application/h5ad' or content_type == 'application/zarr': if content_type == 'application/h5ad': with get_fs(url).open(url, mode='rb') as f: adata = anndata.read(f) else: adata = anndata.read_zarr(get_fs(url).get_mapper(url)) adata_df = adata_to_df(adata) return Response(adata_df.to_json(double_precision=2, orient='records'), content_type='application/json') else: # URL to JSON or text return send_file(url) elif isinstance(job, dict): return json_response(job) elif isinstance(job, anndata.AnnData): return Response(adata_to_df(job).to_json(double_precision=2, orient='records'), content_type='application/json') return job