示例#1
0
def test_de_backed(sparse, file_format, tmp_path):
    fs = fsspec.filesystem('file')
    adata = get_example_data(sparse)
    output_dir = str(tmp_path)
    prepare_data = PrepareData(datasets=[adata],
                               output=output_dir,
                               output_format=file_format)
    prepare_data.execute()
    if file_format == 'parquet':
        reader = ParquetDataset()
    elif file_format == 'zarr':
        reader = ZarrDataset()
    batch_size = 30
    obs_field = 'sc_groups'
    nfeatures = adata.shape[1]

    def get_batch_fn(i):
        end = min(nfeatures, i + batch_size)
        return reader.read_dataset(filesystem=fs,
                                   path=output_dir,
                                   dataset=dict(id=''),
                                   keys=dict(X=[slice(i, end)]))

    results = DE(series=adata.obs[obs_field],
                 nfeatures=nfeatures,
                 batch_size=batch_size,
                 get_batch_fn=get_batch_fn,
                 base=get_base(adata),
                 one_vs_rest=True)
    diff_results(adata, obs_field, results.pair2results[0])
示例#2
0
def test_de_2_groups(sparse):
    adata = get_example_data(sparse)
    batch_size = 3
    obs_field = 'sc_groups'
    nfeatures = adata.shape[1]
    get_batch_fn = lambda i: adata[:, i:min(nfeatures, i + batch_size)]

    results = DE(series=adata.obs[obs_field],
                 nfeatures=nfeatures,
                 batch_size=batch_size,
                 get_batch_fn=get_batch_fn,
                 base=get_base(adata),
                 one_vs_rest=True)
    diff_results(adata, obs_field, results.pair2results[0])
示例#3
0
def test_de_4_groups(sparse):
    adata1 = get_example_data(sparse)
    adata2 = get_example_data(sparse)
    adata2.obs['sc_groups'] = adata2.obs['sc_groups'].replace({0: 2, 1: 3})
    adata = anndata.concat((adata1, adata2))
    adata.obs_names_make_unique()
    batch_size = 3
    obs_field = 'sc_groups'
    adata.obs[obs_field] = adata.obs[obs_field].astype('category')
    nfeatures = adata.shape[1]
    get_batch_fn = lambda i: adata[:, i:min(nfeatures, i + batch_size)]
    de = DE(series=adata.obs[obs_field],
            nfeatures=nfeatures,
            batch_size=batch_size,
            get_batch_fn=get_batch_fn,
            base=get_base(adata))
    for i in range(4):
        diff_results(adata, obs_field, de.pair2results[i], str(i))