Python read_input示例，pegasus.read_input Python示例

示例#1

0

显示文件

文件： test_aggregate.py 项目： nealpsmith/pegasus

    def test_aggregate_10x_matrices(self):
        m1 = pg.read_input(
            "tests/pegasus-test-data/input/heart_1k_v3/filtered_feature_bc_matrix.h5",
            genome="mm10",
        )
        m2 = pg.read_input(
            "tests/pegasus-test-data/input/heart_1k_v2/filtered_gene_bc_matrices_h5.h5",
            genome="mm10",
        )
        pg.aggregate_matrices(
            "tests/pegasus-test-data/input/aggregate_test.csv",
            what_to_return='aggregate_test',
        )

        result = pg.read_input("aggregate_test.h5sc", genome="mm10")
        self.assertEqual(m1.shape[0] + m2.shape[0], result.shape[0],
                         "Cell dimension is incorrect")
        self.assertEqual(m1.shape[1], result.shape[1],
                         "Feature dimension is incorrect")

        m1_result = result[list(range(m1.shape[0])), :]
        m2_result = result[list(range(m1.shape[0], m1.shape[0] +
                                      m2.shape[0])), :]
        self.assertEqual((m1_result.X != m1.X).sum(), 0, "Values differ")
        self.assertEqual((m2_result.X != m2.X).sum(), 0, "Values differ")
        self.assertTrue(
            m1_result.obs.index.values[0].startswith("heart_1k_v3"),
            "Prefix not added")
        self.assertTrue(
            m2_result.obs.index.values[0].startswith("heart_1k_v2"),
            "Prefix not added")

示例#2

0

显示文件

 def test_read_write_h5ad(self):
     adata = pg.read_input(
         "tests/pegasus-test-data/input/hgmm_1k_v3_filtered_feature_bc_matrix/"
     )
     pg.write_output(adata, "test.h5ad")
     adata2 = pg.read_input("test.h5ad")
     assert_adata_equal(self, adata, adata2)

示例#3

0

显示文件

 def test_read_write_old_5ad_backed_whitelist(self):
     shutil.copy(
         "tests/pegasus-test-data/input/test_obsm_compound.h5ad",
         "test_obsm_compound.h5ad",
     )
     adata = pg.read_input("test_obsm_compound.h5ad", h5ad_mode="r+")
     pg.write_output(adata, "test_obsm_compound.h5ad", whitelist=["obs"])
     adata2 = pg.read_input("test_obsm_compound.h5ad")
     assert_adata_equal(self, adata, adata2)

示例#4

0

显示文件

    def test_output(self):
        data_h5ad = pg.read_input("tests/result.mm10-rna.h5ad")
        self.assertEqual(self.data.shape, data_h5ad.shape,
                         "H5AD format's shape is inconsistent!")

        data_loom = pg.read_input("tests/result.mm10-rna.loom")
        self.assertEqual(self.data.shape, data_loom.shape,
                         "Loom format's shape is inconsistent!")

        self.assertIn('result.log', os.listdir('tests'),
                      'Clustering log is lost!')

示例#5

0

显示文件

    def test_csv(self):
        df = pd.DataFrame(index=["a", "b", "c"],
                          data=dict(a=[1, 2, 3], b=[4, 5, 6]))
        df.to_csv("test.csv")
        adata = pg.read_input("test.csv", genome="test").T
        np.testing.assert_array_equal(df.values, adata.X.toarray())
        np.testing.assert_array_equal(df.index.values, adata.obs.index.values)
        np.testing.assert_array_equal(df.columns.values,
                                      adata.var.index.values)

        for chunk_size in [1, 2, 3, 4]:
            adata_chunks = pg.read_input("test.csv",
                                         genome="test",
                                         chunk_size=chunk_size).T
            assert_adata_equal(self, adata, adata_chunks)

示例#6

0

显示文件

 def test_write_mtx(self):
     adata = pg.read_input(
         "tests/pegasus-test-data/input/heart_1k_v3/filtered_feature_bc_matrix.h5"
     )
     adata.var['test'] = 1.0
     adata.obs['test'] = 1.0
     output_dir = 'test_mtx/mm10'
     pg.write_output(adata, os.path.join(output_dir, 'matrix.mtx.gz'))
     adata2 = pg.read_input(output_dir)
     del adata2.obs['Channel']  # get channel from csv
     adata2.obs = adata2.obs.join(
         pd.read_csv(os.path.join(output_dir, 'obs.csv.gz'), index_col=0))
     adata2.var = adata2.var.join(
         pd.read_csv(os.path.join(output_dir, 'var.csv.gz'), index_col=0))
     del adata2.var['featuretype']
     assert_adata_equal(self, adata, adata2, obs_blacklist=['Channel'])

示例#7

0

显示文件

def read_dataset(path,
                 obs=None,
                 var=None,
                 obs_filter=None,
                 var_filter=None,
                 **keywords):
    """
    Read h5ad, loom, mtx, 10X h5, and csv formatted files

    Parameters
    ----------
    path: str
        File name of data file.
    obs: {str, pd.DataFrame}
        Path to obs data file or a data frame
    var: {str, pd.DataFrame}
        Path to var data file or a data frame
    obs_filter {str, pd.DataFrame}
        File with one id per line, name of a boolean field in obs, or a list of ids
    var_filter: {str, pd.DataFrame}
        File with one id per line, name of a boolean field in obs, or a list of ids
    Returns
    -------
    Annotated data matrix.
    """
    if str(path).lower().endswith('.txt'):
        df = pd.read_csv(path,
                         engine='python',
                         header=0,
                         sep=None,
                         index_col=0)
        adata = anndata.AnnData(X=df.values,
                                obs=pd.DataFrame(index=df.index),
                                var=pd.DataFrame(index=df.columns))
    else:
        adata = pg.read_input(path, **keywords)

    def get_df(meta):
        if not isinstance(meta, pd.DataFrame):
            tmp_path = None
            if meta.startswith('gs://'):
                tmp_path = download_gs_url(meta)
                meta = tmp_path
            meta = pd.read_csv(meta, sep=None, index_col='id', engine='python')
            if tmp_path is not None:
                os.remove(tmp_path)
        return meta

    if obs is not None:
        if not isinstance(obs, list) and not isinstance(obs, tuple):
            obs = [obs]
        for item in obs:
            adata.obs = adata.obs.join(get_df(item))
    if var is not None:
        if not isinstance(var, list) and not isinstance(var, tuple):
            var = [var]
        for item in var:
            adata.var = adata.var.join(get_df(item))

    return filter_adata(adata, obs_filter=obs_filter, var_filter=var_filter)

示例#8

0

显示文件

文件： test_hashing_citeseq.py 项目： michael-alperovich/pegasus

 def test_demux(self):
     data = pg.read_input("tests/cb_cc_demux.zarr.zip")
     self.assertEqual(data.shape, (737280, 33694),
                      "Demux data shape differs!")
     self.assertIn('demux_type', data.obs.columns, "Demux type is lost!")
     self.assertIn('assignment', data.obs.columns,
                   "Cell assignment is lost!")
     f_list = glob.glob("tests/cb_cc.*.pdf")
     self.assertEqual(len(f_list), 4, "Demux diagnosis plots are missing!")
     self.assertIn('cb_cc.out.demuxEM.zarr.zip', os.listdir('tests'),
                   "Demultiplexed RNA matrix is lost!")

示例#9

0

显示文件

文件： test_hashing_citeseq.py 项目： michael-alperovich/pegasus

 def test_citeseq(self):
     data = pg.read_input("tests/cb_cc_citeseq.zarr.zip")
     self.assertSetEqual(set(data.list_data()),
                         set(['GRCh38-citeseq', 'GRCh38-rna']),
                         "Some modality is missing!")
     self.assertIn('demux_type', data.obs.columns, "Demux type is lost!")
     self.assertIn('assignment', data.obs.columns,
                   "Cell assignment is lost!")
     self.assertEqual(data.shape, (737280, 33694),
                      "RNA data shape differs!")
     data.select_data('GRCh38-citeseq')
     self.assertEqual(data.shape, (578353, 31),
                      "CITE-Seq data shape differs!")

示例#10

0

显示文件

def test_mantonbm():
    print("Testing on MantonBM dataset...")

    z_files = [f for f in os.listdir("./result") if re.match("MantonBM.*_z.(txt|npy)", f)]
    if len(z_files) < 3 or not os.path.exists("./result/MantonBM_result.h5ad"):
        adata = pg.read_input("./data/MantonBM/original_data.h5ad")
        adata.obs['Individual'] = pd.Categorical(adata.obs['Channel'].apply(lambda s: s.split('_')[0][-1]))

    if os.path.exists("./result/MantonBM_torch_z.npy"):
        Z_torch = np.load("./result/MantonBM_torch_z.npy")
        print("Precalculated embedding by harmony-pytorch is loaded.")
    else:
        start_torch = time.time()
        Z_torch = harmonize(adata.obsm['X_pca'], adata.obs, batch_key = 'Channel')
        end_torch = time.time()

        print("Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch))
        np.save("./result/MantonBM_torch_z.npy", Z_torch)

    if os.path.exists("./result/MantonBM_py_z.npy"):
        Z_py = np.load("./result/MantonBM_py_z.npy")
        print("Precalculated embedding by harmonypy is loaded.")
    else:
        start_py = time.time()
        ho = run_harmony(adata.obsm['X_pca'], adata.obs, ['Channel'])
        end_py = time.time()

        print("Time spent for harmonypy = {:.2f}s.".format(end_py - start_py))

        Z_py = np.transpose(ho.Z_corr)
        np.save("./result/MantonBM_py_z.npy", Z_py)


    Z_R = np.loadtxt("./result/MantonBM_harmony_z.txt")

    check_metric(Z_torch, Z_py, Z_R, prefix = "MantonBM", norm = 'r')
    check_metric(Z_torch, Z_py, Z_R, prefix = "MantonBM", norm = 'L2')

    if os.path.exists("./result/MantonBM_result.h5ad"):
        adata = None

    umap_list = [f for f in os.listdir("./plots") if re.match("MantonBM.*.pdf", f)]
    if len(umap_list) < 4:
        plot_umap(adata, Z_torch, Z_py, Z_R, prefix = "MantonBM", batch_key = "Individual")

示例#11

0

显示文件

文件： test_hashing_citeseq.py 项目： michael-alperovich/pegasus

 def test_clustering(self):
     data = pg.read_input("tests/citeseq_result.zarr.zip")
     self.assertSetEqual(set(data.list_data()),
                         set(['GRCh38-citeseq', 'GRCh38-rna']),
                         "Some modality is missing!")
     n_rna_cells = data.shape[0]
     self.assertNotIn('demux_type', data.obs.columns,
                      "Demux type is not removed!")
     self.assertEqual(data.obs['assignment'].cat.categories.size, 7,
                      "Not all cells are demultiplexed singlets!")
     self.assertIn('X_citeseq', data.obsm.keys(),
                   "CITE-Seq coordinates are lost!")
     self.assertEqual(data.obsm['X_citeseq_umap'].shape[1],
                      data.obsm['X_umap'].shape[1],
                      "Some of UMAP embeddings is lost!")
     data.select_data('GRCh38-citeseq')
     n_citeseq_cells = data.shape[0]
     self.assertEqual(n_rna_cells, n_citeseq_cells,
                      "Two modalities have inconsistent number of cells!")

示例#12

0

显示文件

def test_pbmc():
    print("Testing on 10x pbmc dataset...")

    z_files = [f for f in os.listdir("./result") if re.match("pbmc.*_z.(txt|npy)", f)]
    if len(z_files) < 3 or not os.path.exists("./result/pbmc_result.h5ad"):
        adata = pg.read_input("./data/10x_pbmc/original_data.h5ad")

    if os.path.exists("./result/pbmc_torch_z.npy"):
        Z_torch = np.load("./result/pbmc_torch_z.npy")
        print("Precalculated embedding by harmony-pytorch is loaded.")
    else:
        start_torch = time.time()
        Z_torch = harmonize(adata.obsm['X_pca'], adata.obs, batch_key = 'Channel')
        end_torch = time.time()

        print("Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch))
        np.save("./result/pbmc_torch_z.npy", Z_torch)

    if os.path.exists("./result/pbmc_py_z.npy"):
        Z_py = np.load("./result/pbmc_py_z.npy")
        print("Precalculated embedding by harmonypy is loaded.")
    else:
        start_py = time.time()
        ho = run_harmony(adata.obsm['X_pca'], adata.obs, ['Channel'])
        end_py = time.time()

        print(ho.objective_harmony)
        print("Time spent for harmonypy = {:.2f}s.".format(end_py - start_py))

        Z_py = np.transpose(ho.Z_corr)
        np.save("./result/pbmc_py_z.npy", Z_py)

    Z_R = np.loadtxt("./result/pbmc_harmony_z.txt")

    check_metric(Z_torch, Z_py, Z_R, prefix = "pbmc", norm = 'r')
    check_metric(Z_torch, Z_py, Z_R, prefix = "pbmc", norm = 'L2')

    if os.path.exists("./result/pbmc_result.h5ad"):
        adata = None

    umap_list = [f for f in os.listdir("./plots") if re.match("pbmc.*.pdf", f)]
    if len(umap_list) < 4:
        plot_umap(adata, Z_torch, Z_py, Z_R, prefix = "pbmc", batch_key = "Channel")

示例#13

0

显示文件

文件： test_gpu.py 项目： letaylor/harmony-pytorch

def test_mantonbm():
    print("Testing on MantonBM...")

    z_files = [f for f in os.listdir("./result") if re.match("MantonBM.*_z.(txt|npy)", f)]
    if len(z_files) < 3:
        adata = pg.read_input("./data/MantonBM/original_data.h5ad")
        adata.obs['Individual'] = pd.Categorical(adata.obs['Channel'].apply(lambda s: s.split('_')[0][-1]))

    if os.path.exists("./result/MantonBM_cpu_z.npy"):
        Z_cpu = np.load("./result/MantonBM_cpu_z.npy")
        print("Precalculated CPU mode result is loaded.")
    else:
        start_cpu = time.time()
        Z_cpu = harmonize(adata.obsm['X_pca'], adata.obs, 'Channel')
        end_cpu = time.time()

        print("Time spent in CPU mode = {:.2f}s.".format(end_cpu - start_cpu))
        np.save("./result/MantonBM_cpu_z.npy", Z_cpu)

    if os.path.exists("./result/MantonBM_gpu_z.npy"):
        Z_gpu = np.load("./result/MantonBM_gpu_z.npy")
        print("Precalculated GPU mode result is loaded.")
    else:
        start_gpu = time.time()
        Z_gpu = harmonize(adata.obsm['X_pca'], adata.obs, 'Channel', use_gpu = True)
        end_gpu = time.time()

        print("Time spent in GPU mode = {:.2f}s".format(end_gpu - start_gpu))
        np.save("./result/MantonBM_gpu_z.npy", Z_gpu)

    Z_R = np.loadtxt("./result/MantonBM_harmony_z.txt")

    check_metrics(Z_cpu, Z_R, prefix = "MantonBM_cpu")
    check_metrics(Z_gpu, Z_R, prefix = "MantonBM_gpu")

    if os.path.exists("./result/MantonBM_result.h5ad"):
        adata = None

    umap_list = [f for f in os.listdir("./plots") if re.match("MantonBM.*.pdf", f)]
    if len(umap_list) < 4:
        plot_umap(adata, Z_cpu, Z_gpu, Z_R, prefix = "MantonBM", batch_key = 'Individual')

示例#14

0

显示文件

def test_cell_lines():
    print("Testing on cell lines dataset...")

    z_files = [f for f in os.listdir("./result") if re.match("cell_lines.*_z.(txt|npy)", f)]
    if len(z_files) < 3 or not os.path.exists("./result/cell_lines_result.h5ad"):
        X = np.loadtxt("./data/cell_lines/pca.txt")
        df_metadata = pd.read_csv("./data/cell_lines/metadata.csv")
        source_loaded = True

    if os.path.exists("./result/cell_lines_torch_z.npy"):
        Z_torch = np.load("./result/cell_lines_torch_z.npy")
        print("Precalculated embedding by harmony-pytorch is loaded.")
    else:
        start_torch = time.time()
        Z_torch = harmonize(X, df_metadata, batch_key = 'dataset')
        end_torch = time.time()

        print("Time spent for harmony-pytorch = {:.2f}s.".format(end_torch - start_torch))
        np.save("./result/cell_lines_torch_z.npy", Z_torch)

    if os.path.exists("./result/cell_lines_py_z.npy"):
        Z_py = np.load("./result/cell_lines_py_z.npy")
        print("Precalculated embedding by harmonypy is loaded.")
    else:
        start_py = time.time()
        ho = run_harmony(X, df_metadata, ['dataset'])
        end_py = time.time()

        print("Time spent for harmonypy = {:.2f}s.".format(end_py - start_py))
        print(ho.objective_harmony)

        Z_py = np.transpose(ho.Z_corr)
        np.save("./result/cell_lines_py_z.npy", Z_py)

    Z_R = np.loadtxt("./result/cell_lines_harmony_z.txt")

    check_metric(Z_torch, Z_py, Z_R, prefix = "cell_lines", norm = 'r')
    check_metric(Z_torch, Z_py, Z_R, prefix = "cell_lines", norm = 'L2')

    if os.path.exists("./result/cell_lines_result.h5ad"):
        adata = None
    else:
        n_obs = X.shape[0]
        adata = AnnData(X = csr_matrix((n_obs, 2)), obs = df_metadata)
        adata.obsm['X_pca'] = X

        pg.neighbors(adata, rep = 'pca')
        pg.umap(adata)

    umap_list = [f for f in os.listdir("./plots") if re.match("cell_lines.*.pdf", f)]
    if len(umap_list) < 4:
        plot_umap(adata, Z_torch, Z_py, Z_R, prefix = "cell_lines", batch_key = "dataset")

    if os.path.exists("./result/cell_lines_result.h5ad"):
       adata = pg.read_input("./result/cell_lines_result.h5ad", h5ad_mode = 'r')

       stat, pvalue, ac_rate = pg.calc_kBET(adata, attr = 'dataset', rep = 'harmony')
       print("kBET for Harmony: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(stat = stat, pval = pvalue, ac_rate = ac_rate))

       stat, pvalue, ac_rate = pg.calc_kBET(adata, attr = 'dataset', rep = 'py')
       print("kBET for harmonypy: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(stat = stat, pval = pvalue, ac_rate = ac_rate))

       stat, pvalue, ac_rate = pg.calc_kBET(adata, attr = 'dataset', rep = 'torch')
       print("kBET for harmony-pytorch: statistic = {stat}, p-value = {pval}, ac rate = {ac_rate}".format(stat = stat, pval = pvalue, ac_rate = ac_rate))

示例#15

0

显示文件

 def test_mtx_v3_dir(self):
     adata = pg.read_input(
         "tests/pegasus-test-data/input/hgmm_1k_v3_filtered_feature_bc_matrix/"
     )
     self.assertEqual(adata.shape[0], 1046)

示例#16

0

显示文件

 def test_mtx_v2_dir(self):
     adata = pg.read_input(
         "tests/pegasus-test-data/input/hgmm_1k_filtered_gene_bc_matrices/hg19/"
     )
     self.assertEqual(adata.shape[0], 504)

示例#17

0

显示文件

 def test_read_write_old_5ad(self):
     adata = pg.read_input(
         "tests/pegasus-test-data/input/test_obsm_compound.h5ad")
     pg.write_output(adata, "test.h5ad")
     adata2 = pg.read_input("test.h5ad")
     assert_adata_equal(self, adata, adata2)

示例#18

0

显示文件

if __name__ == "__main__":
    import pandas as pd
    import pegasus as pg
    import argparse

    parser = argparse.ArgumentParser(
        description='Update the X_pca with the results of harmony')
    parser.add_argument('h5ad_filename', type=str)
    parser.add_argument('harmony_csv', type=str)
    parser.add_argument('output', type=str)

    args = parser.parse_args()
    args = args.__dict__

    pca = pd.read_csv(args["harmony_csv"])
    pca = pca.values.T[1:]  # remove the id pf the pc

    adata = pg.read_input(args["h5ad_filename"])
    adata.obsm["X_pca"] = pca
    pg.write_output(adata, args["output"])

示例#19

0

显示文件

def main():

    args = my_args()

    out = args.output
    command = "pegasus aggregate_matrix %s %s" % (args.input_csv, out)
    os.system(command)
    zarr_file = "%s.zarr.zip" % (out)

    data = pg.read_input(zarr_file)
    if args.citeseq:
        data.select_data("%s-rna" % (data.uns['genome']))
    pg.qc_metrics(data,
                  percent_mito=args.MT_percent,
                  mito_prefix=args.MT_prefix,
                  max_genes=args.max_genes)
    df_qc = pg.get_filter_stats(data)
    df_qc.to_csv("%s_qc_get_filter_stats.csv" % (out))

    pg.qcviolin(data, plot_type='gene')
    plt.savefig("%s_qcviolin_gene.pdf" % (out), bbox_inches='tight')

    pg.qcviolin(data, plot_type='count')
    plt.savefig("%s_qcviolin_UMI_count.pdf" % (out), bbox_inches='tight')

    pg.qcviolin(data, plot_type='mito')
    plt.savefig("%s_qcviolin_UMI_mito.pdf" % (out), bbox_inches='tight')

    # filtering
    pg.filter_data(data)
    pg.identify_robust_genes(data, percent_cells=0.05)
    pg.log_norm(data)

    print(data.obs['Channel'].value_counts())
    # save log norm data, rna
    df = pd.DataFrame.sparse.from_spmatrix(data.X)
    df.index = data.obs.index.tolist()
    df.columns = data.var.index.tolist()
    df.to_pickle("%s.rna.log_norm.pkl" % (out))

    if args.citeseq:
        data.select_data("%s-citeseq" % (data.uns['genome']))
        df = pd.DataFrame.sparse.from_spmatrix(data.X)
        df.index = data.obs.index.tolist()
        df.columns = data.var.index.tolist()
        df.to_pickle("%s.antibody.log_norm.pkl" % (out))
        data.select_data("%s-rna" % (data.uns['genome']))

    # without batch correction
    data_baseline = data.copy()
    pg.highly_variable_features(data_baseline,
                                consider_batch=False,
                                n_top=4000)
    data_baseline.var.loc[
        data_baseline.var['highly_variable_features']].sort_values(
            by='hvf_rank')

    pg.hvfplot(data_baseline)
    plt.savefig("%s_hvfplot_noBC.pdf" % (out), bbox_inches='tight')

    pg.pca(data_baseline, n_components=200)
    pg.neighbors(data_baseline, K=200)
    pg.louvain(data_baseline, resolution=2)
    pg.umap(data_baseline, n_neighbors=10, min_dist=0.4)
    pg.scatter(data_baseline,
               attrs=['louvain_labels', 'Channel'],
               basis='umap')
    plt.savefig("%s_without_BC.pdf" % (out), bbox_inches='tight')

    # with batch correction
    pg.highly_variable_features(data, consider_batch=True, n_top=4000)
    data.var.loc[data.var['highly_variable_features']].sort_values(
        by='hvf_rank')

    pg.hvfplot(data)
    plt.savefig("%s_hvfplot_noBC.pdf" % (out), bbox_inches='tight')

    data_harmony = data.copy()
    pg.pca(data_harmony, n_components=200)
    harmony_key = pg.run_harmony(data_harmony)
    pg.neighbors(data_harmony, rep=harmony_key, K=200)
    pg.louvain(data_harmony, rep=harmony_key, resolution=2)
    pg.umap(data_harmony, rep=harmony_key, n_neighbors=10, min_dist=0.4)
    pg.scatter(data_harmony, attrs=['louvain_labels', 'Channel'], basis='umap')
    plt.savefig("%s_Harmony_BC.pdf" % (out), bbox_inches='tight')
    pg.write_output(data_harmony, "%s_harmony.zarr" % (out))

    ddf = pd.DataFrame.sparse.from_spmatrix(data_harmony.X)
    ddf.index = data_harmony.obs.index.tolist()
    ddf.columns = data_harmony.var.index.tolist()
    data_harmony.select_data("%s-citeseq" % (data_harmony.uns['genome']))
    ddf2 = pd.DataFrame.sparse.from_spmatrix(data_harmony.X)
    ddf2.index = data_harmony.obs.index.tolist()
    ddf2.columns = data_harmony.var.index.tolist()
    df_all = pd.concat([ddf, ddf2], axis=1)
    df_all = df_all.sparse.to_dense()
    df_all = df_all.round(3)
    df_all.to_csv("%s.Harmony_correction.data.csv" % (out))
    ### original harmony UMAP data
    out = data_harmony.obs.copy()
    out['UMAP1'] = data_harmony.obsm['X_umap'][:, 0]
    out['UMAP2'] = data_harmony.obsm['X_umap'][:, 1]
    from anndata import AnnData
    ann = AnnData(X=out[['UMAP1', 'UMAP2']],
                  obs=out[['Channel', 'louvain_labels']])
    import scanpy as sc
    from matplotlib import rcParams
    sc.pl.scatter(ann,
                  x="UMAP1",
                  y="UMAP2",
                  color='louvain_labels',
                  legend_loc='on data',
                  legend_fontsize=12,
                  legend_fontoutline=2,
                  frameon=False,
                  title='clustering of cells')
    plt.savefig("%s_Scapy_UMAP.png" % (args.output), bbox_inches='tight')
    out.to_csv("%s_Harmony_UMAP.csv" % (args.output))

示例#20

0

显示文件

文件： pegasus.py 项目： ohsu-cedar-comp-hub/AML_stroma

import os
import pegasus as pg



#RNA_markers = snakemake.input.RNA
#integrated_markers = snakemake.input.integrated

####################
# GLOBAL VARIABLES #
####################
args = get_args()

adata = pg.read_input("MantonBM_nonmix_subset.h5sc")
#directory = snakemake.params.out_dir 

############
# FUNCTION #
############



###############
#     MAIN    #
###############

def main():
    
main()

示例#21

0

显示文件

 def __init__(self, *args, **kwargs):
     super(TestPipeline, self).__init__(*args, **kwargs)
     self.aggr_data = pg.read_input("tests/aggr.zarr.zip")
     self.data = pg.read_input("tests/result.zarr.zip")