示例#1
0
def integrate_spatial_rna(adata_spatial, adata_rna, type='anterior'):
    adata_all = tl.spatial_rna_preprocessing(adata_spatial,
                                             adata_rna,
                                             n_top_genes=10000)
    adata_integrate = davae.fit_integration(
        adata_all,
        epochs=100,
        batch_size=2,
        domain_lambda=5,
        sparse=True,
        hidden_layers=[128, 64, 32],
        split_by='batch',
    )
    sc.pp.neighbors(adata_integrate, use_rep='X_davae')
    sc.tl.umap(adata_integrate)
    sc.pl.umap(adata_integrate, color='batch')
    len_spatial = adata_spatial.shape[0]
    len_rna = adata_rna.shape[0]
    davae_emb = adata_integrate.obsm['X_davae']
    adata_spatial.obsm["davae_embedding"] = davae_emb[0:len_spatial, :]
    adata_rna.obsm['davae_embedding'] = davae_emb[len_spatial:len_rna +
                                                  len_spatial, :]
    distances = 1 - cosine_distances(adata_rna.obsm["davae_embedding"],
                                     adata_spatial.obsm['davae_embedding'])
    class_prob_anterior = label_transfer(distances,
                                         adata_rna.obs.cell_subclass)
    cp_spatial_df = pd.DataFrame(class_prob_anterior,
                                 columns=np.sort(
                                     adata_rna.obs.cell_subclass.unique()))
    label = cp_spatial_df.idxmax(axis='columns').values
    cp_spatial_df.index = adata_spatial.obs.index
    adata_transfer = adata_spatial.copy()
    adata_transfer.obs = pd.concat([adata_spatial.obs, cp_spatial_df], axis=1)
    sc.pl.spatial(
        adata_transfer,
        img_key="hires",
        # color=["L2/3 IT", "L4", "L5 PT", "L6 CT"],
        colot=['Hpca'],
        size=1.5,
        color_map='Blues',
        ncols=2,
        legend_fontsize='xx-small')

    adata_spatial.obs['celltype'] = label
    # sc.pl.spatial(
    #     adata_transfer,
    #     img_key="hires",
    #     color='celltype',
    #     size=1.5,
    #     color_map='Set2'
    # )
    label = list(label)
    from collections import Counter
    print(Counter(label))
    adata_spatial.write_h5ad('/Users/zhongyuanke/data/dann_vae/spatial/' +
                             type + '_label_02.h5ad')
示例#2
0
def deep_label_transfer(adata_spatial, adata_rna, type='anterior'):
    adata_all = tl.spatial_rna_preprocessing(adata_spatial, adata_rna)
    adata_integrate = davae.fit_integration(
        adata_all,
        epochs=45,
        hidden_layers=[128, 64, 32, 5],
        sparse=True,
        domain_lambda=3.0,
    )
    sc.pp.neighbors(adata_integrate, use_rep='X_davae')
    sc.tl.umap(adata_integrate)
    sc.pl.umap(adata_integrate, color='batch')
    rna_celltype = adata_rna.obs.cell_subclass
    print(rna_celltype)
    encoder = LabelEncoder()
    orig_label = encoder.fit_transform(rna_celltype)
    print(orig_label)
    orig_label.dtype = 'int64'

    davae_emb = adata_integrate.obsm['X_davae']
    len_spatial = adata_spatial.shape[0]
    len_rna = adata_rna.shape[0]
    test_set = davae_emb[0:len_spatial]
    train_set = davae_emb[len_spatial:len_spatial + len_rna]

    label = to_categorical(orig_label)
    print(label)
    class_num = label.shape[1]

    net_x = dc.CLASSIFIER(input_size=train_set.shape[1], class_num=class_num)
    net_x.build()
    net_x.compile()
    net_x.train(x=train_set, label=label, epochs=25, batch_size=128)
    pred_label = net_x.prediction(test_set)
    pred_label.dtype = 'int64'
    pred_type = encoder.inverse_transform(pred_label)

    # df = pd.DataFrame(pred_type)
    # df.to_csv('/Users/zhongyuanke/data/dann_vae/atac/pred_type_save03.csv')
    # np.savetxt('/Users/zhongyuanke/data/dann_vae/atac/pred_label_save03.csv', pred_label, delimiter=',')
    #
    # all_label = np.concatenate([pred_label, orig_label])
    # all_type = encoder.inverse_transform(all_label)

    print(pred_type)
    type_list = list(pred_type)
    print(Counter(type_list))

    adata_spatial.obs['celltype'] = pred_type
    # adata_davae.obs['cell type'] = all_type
    adata_spatial.write_h5ad(base_path + 'dann_vae/spatial/'+type+'_label_02.h5ad')
示例#3
0
                    help="base path")
parser.add_argument("--epoch", type=int, default=15, help="epochs")
opt = parser.parse_args()

base_path = opt.base_path
epoch = opt.epoch
#
time_list = []
adata1 = sc.read_h5ad(base_path + 'blood_5w.h5ad')
adata2 = sc.read_h5ad(base_path + 'bone_5w.h5ad')
print(adata1)
print(adata2)
adata_all = tl.davae_preprocessing([adata1, adata2])
t0 = time.time()
adata_out = davae.fit_integration(adata_all,
                                  batch_size=256,
                                  epochs=epoch,
                                  sparse=True)
t1 = time.time()
print("Total time running DAVAE 10w cells: %s seconds" % (str(t1 - t0)))
time_list.append(t1 - t0)
info = psutil.virtual_memory()
print('内存使用:',
      psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 / 1024, 'GB')
print('总内存:', info.total / 1024 / 1024 / 1024, 'GB')
print('内存占比:', info.percent)
print('cpu个数:', psutil.cpu_count())

adata1 = sc.read_h5ad(base_path + 'blood_10w.h5ad')
adata2 = sc.read_h5ad(base_path + 'bone_10w.h5ad')
print(adata1)
print(adata2)
示例#4
0
# sc.pp.highly_variable_genes(adata2, n_top_genes=6000)
# adata1 = adata1[:, adata1.var.highly_variable]
# adata2 = adata2[:, adata2.var.highly_variable]
#
# adata1.write_h5ad(file1_p)
# adata2.write_h5ad(file2_p)
# del adata1.var['highly_variable']
# del adata2.var['highly_variable']
# del adata1.var['means']
# del adata2.var['means']
# del adata1.var['dispersions']
# del adata2.var['dispersions']
# del adata1.var['dispersions_norm']
# del adata2.var['dispersions_norm']
print(adata1)
print(adata2)
adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=4000)
adata_integrate = davae.fit_integration(
    adata_all,
    epochs=25,
    hidden_layers=[128, 64, 32, 5],
    sparse=True,
    domain_lambda=0.5,
)
# import umap
# adata_integrate.obsm['X_umap']=umap.UMAP().fit_transform(adata_integrate.obsm['X_davae'])
sc.pp.neighbors(adata_integrate, use_rep='X_davae', n_neighbors=8)
sc.tl.umap(adata_integrate)
sc.pl.umap(adata_integrate, color=['_batch'], s=3)
adata_integrate.write_h5ad(base_path + out_path)
示例#5
0
import scbean.model.davae as davae
import scbean.tools.utils as tl
import scanpy as sc
import matplotlib
from numpy.random import seed
seed(2021)
matplotlib.use('TkAgg')

adata = tl.read_sc_data('/Users/zhongyuanke/data/seurat_data/ifnb/ifnb.h5ad')
datasets = tl.split_object(adata, by="stim")
print(datasets[0])
print(datasets[1])
adata_all = tl.davae_preprocessing(datasets, n_top_genes=8000)
adata_intagrate = davae.fit_integration(
    adata_all,
    epochs=30,
    hidden_layers=[128, 64, 32, 5],
    domain_lambda=3.0,
)
print(adata_intagrate)
sc.pp.neighbors(adata_intagrate, use_rep='X_davae', n_neighbors=15)
sc.tl.louvain(adata_intagrate)
sc.tl.umap(adata_intagrate)
sc.pl.umap(adata_intagrate, color='louvain', cmap='tab20c')
示例#6
0
matplotlib.use('TkAgg')

epochs = 40
base_path = '/Users/zhongyuanke/data/'

file1 = base_path + 'dann_vae/benchmark1/dc_batch1.h5ad'
file2 = base_path + 'dann_vae/benchmark1/dc_batch2.h5ad'
orig_path = base_path + 'dann_vae/benchmark1/orig.h5ad'
# -------------train---------------------
adata1 = tl.read_sc_data(file1, fmt='h5ad')
adata2 = tl.read_sc_data(file2, fmt='h5ad')
adata_orig = tl.read_sc_data(orig_path, fmt='h5ad')
# orig_label =adata_orig.obs['label']
print(adata1)
print(adata2)
adata_all = tl.davae_preprocessing([adata1, adata2],
                                   n_top_genes=4000,
                                   sparse=False)
adata_integrate = davae.fit_integration(adata_all,
                                        split_by='batch',
                                        epochs=1000,
                                        hidden_layers=[128, 64, 32, 2],
                                        sparse=False,
                                        domain_lambda=6)
adata_integrate.obs['label'] = adata_orig.obs['label']
sc.pp.neighbors(adata_integrate, use_rep='X_davae')
sc.tl.umap(adata_integrate)
sc.pl.umap(adata_integrate, color=['batch', 'label'], s=10, cmap='Dark2')
# print(adata_integrate)
# adata_integrate.write_h5ad(base_path+'dann_vae/benchmark1/dc_davae_temp.h5ad')
示例#7
0
                    type=str,
                    default='/Users/zhongyuanke/data/',
                    help="base path")
parser.add_argument("--epoch", type=int, default=10, help="epoch of training")

opt = parser.parse_args()

base_path = opt.base_path
out_path = 'dann_vae/hca/davae_01.h5ad'
file1 = base_path + 'HCA/ica_cord_blood_h5.h5'
file2 = base_path + 'HCA/ica_bone_marrow_h5.h5'
adata1 = tl.read_sc_data(file1, fmt='10x_h5')
adata2 = tl.read_sc_data(file2, fmt='10x_h5')
adata1.var_names_make_unique()
adata2.var_names_make_unique()
print(adata1)

adata_all = tl.davae_preprocessing([adata1, adata2], hvg=False, lognorm=False)
adata_integrate = davae.fit_integration(
    adata_all,
    split_by='batch',
    domain_lambda=5,
    epochs=1,
    hidden_layers=[128, 64, 32, 5],
    sparse=True,
)
sc.pp.neighbors(adata_integrate, use_rep='X_davae')
sc.tl.umap(adata_integrate)
sc.pl.umap(adata_integrate, color='batch')
print(adata_integrate)
示例#8
0
matplotlib.use('TkAgg')

base_path = "/Users/zhongyuanke/data/vipcca/mixed_cell_lines/"

adata_b1 = tl.read_sc_data(base_path + "293t.h5ad", batch_name="293t")
adata_b2 = tl.read_sc_data(base_path + "jurkat.h5ad", batch_name="jurkat")
adata_b3 = tl.read_sc_data(base_path + "mixed.h5ad", batch_name="mixed")
adata = adata_b1.concatenate(adata_b2)
adata = adata.concatenate(adata_b3)
adata.write_h5ad('/Users/zhongyuanke/data/pbmc/zheng/mcl.h5ad')

adata_all = tl.davae_preprocessing([adata_b1, adata_b2, adata_b3],
                                   n_top_genes=3000)
print(adata_all)
print(adata_all)
adata_integrate = davae.fit_integration(adata_all,
                                        batch_num=3,
                                        split_by='batch_label',
                                        domain_lambda=3.0,
                                        epochs=25,
                                        sparse=True,
                                        hidden_layers=[128, 64, 32, 5])
# sc.pp.neighbors(adata_integrate, use_rep='X_davae', n_neighbors=10)
# sc.tl.umap(adata_integrate)
import umap
adata_integrate.obsm['X_umap'] = umap.UMAP().fit_transform(
    adata_integrate.obsm['X_davae'])
sc.pl.umap(adata_integrate, color=['_batch', 'celltype'], s=3)

# adata_integrate.write_h5ad('/Users/zhongyuanke/data/dann_vae/pbmc/davae_save02.h5ad')
示例#9
0
# loss_weight = np.array(loss_weight)
# print(adata1.shape)
# print(adata2.shape)
# print(orig_data.shape)

# data, batches, batch_label, loss_weight = shuffle(orig_data, orig_batches, orig_batch_label, loss_weight,
#                                             random_state=0)

# net_x = DACVAE(input_size=data.shape[1], batches=2, latent_size=10)
# net_x.build()
# net_x.compile()
# his = net_x.train(data, batches, loss_weight, epochs=epochs, batch_size=batch_size)
#
# mid = net_x.embedding(orig_data, orig_batches)
# adata_mid = anndata.AnnData(X=mid)
# adata_mid.obs['batch'] = orig_batch_label
# adata.obsm['davae'] = mid
adata_all = tl.davae_preprocessing(adata_list, sparse=False)
adata_out = davae.fit_integration(adata_all,
                                  split_by='batch',
                                  mode='DACVAE',
                                  domain_lambda=5.0,
                                  epochs=3,
                                  hidden_layers=[128, 64, 32, 5],
                                  sparse=False)
sc.pp.neighbors(adata_out, use_rep='X_davae')
sc.tl.umap(adata_out)
sc.pl.umap(adata_out, color='celltype')
print(adata_out)
# adata.write_h5ad(base_path+out_path)
示例#10
0
import scbean.model.davae as davae
import scbean.tools.utils as tl
import scanpy as sc
import matplotlib
from numpy.random import seed
import umap

seed(2021)
matplotlib.use('TkAgg')

r1 = "./data/mixed_cell_lines/mixed.h5ad"
r2 = "./data/mixed_cell_lines/293t.h5ad"
r3 = "./data/mixed_cell_lines/jurkat.h5ad"

adata_b1 = tl.read_sc_data(r1, batch_name='mix')
adata_b2 = tl.read_sc_data(r2, batch_name='293t')
adata_b3 = tl.read_sc_data(r3, batch_name='jurkat')

adata_all = tl.davae_preprocessing([adata_b1, adata_b2, adata_b3],
                                   n_top_genes=2000)
adata_integrate = davae.fit_integration(adata_all,
                                        batch_num=3,
                                        domain_lambda=3.0,
                                        epochs=25,
                                        sparse=True,
                                        hidden_layers=[64, 32, 6])
adata_integrate.obsm['X_umap'] = umap.UMAP().fit_transform(
    adata_integrate.obsm['X_davae'])
sc.pl.umap(adata_integrate, color=['_batch', 'celltype'], s=3)
示例#11
0
# sc.pp.log1p(adata2)
# sc.pp.scale(adata2)
# adata2.obs['celltype'] = adata1.obs['celltype']

# adata2.write_h5ad(base_path + 'multimodal/atac_pbmc_10k/activaty_matrix_label.h5ad')
adata_all = tl.davae_preprocessing([adata1, adata2],
                                   n_top_genes=2000,
                                   hvg=False,
                                   lognorm=False)
# sc.pp.scale(adata_all)
print(adata_all)
adata_integrate = davae.fit_integration(
    adata_all,
    # mode='DAVAE',
    batch_num=2,
    split_by='batch_label',
    domain_lambda=6.0,
    epochs=60,
    sparse=True,
    hidden_layers=[128, 64, 32, 16, 8])
print(adata_integrate)
# adata_integrate.obs['celltyp']=adata
import umap
sc.pp.neighbors(adata_integrate, use_rep='X_davae')
sc.tl.umap(adata_integrate)
# adata_integrate.obs['celltype']
adata_integrate.write_h5ad(
    '/Users/zhongyuanke/data/dann_vae/multimodal/davae_multi_temp.h5ad')
#
#
# label=tool.get_label_by_txt(seurat_celltype_path)
示例#12
0
#     batch_categories=[
#         k
#         for d in [
#             adata_spatial_anterior.uns["spatial"],
#             adata_spatial_posterior.uns["spatial"],
#         ]
#         for k, v in d.items()
#     ],
# )
print(adata_spatial)

import scbean.model.davae as davae
adata_integrate = davae.fit_integration(
    adata_spatial,
    epochs=25,
    split_by='loss_weight',
    hidden_layers=[128, 64, 32, 5],
    sparse=True,
    domain_lambda=0.5,
)
print(adata_spatial)
# embedding_adata = sc.read_h5ad(base_path+'dann_vae/spatial/davae_save02.h5ad')
adata_spatial.obsm["X_davae"] = adata_integrate.obsm['X_davae']
sc.pp.neighbors(adata_spatial, use_rep='X_davae', n_neighbors=10)
sc.tl.umap(adata_spatial)
sc.tl.leiden(adata_spatial, key_added="clusters")
sc.pl.umap(adata_spatial,
           color=["library_id", "clusters"],
           size=4,
           color_map='Set2',
           frameon=False)
# sc.pl.umap(
示例#13
0
import scanpy as sc
import argparse
import scbean.model.davae as davae
import scbean.tools.utils as tl

parser = argparse.ArgumentParser()
parser.add_argument("--base_path",
                    type=str,
                    default='/Users/zhongyuanke/data/hca/',
                    help="base path")
parser.add_argument("--type", type=str, default='5w', help="cell counts")
parser.add_argument("--epoch", type=int, default=2, help="epochs")
opt = parser.parse_args()

base_path = opt.base_path
file1 = base_path + 'blood_' + opt.type + '.h5ad'
file2 = base_path + 'bone_' + opt.type + '.h5ad'

adata1 = sc.read_h5ad(file1)
adata2 = sc.read_h5ad(file2)
print(adata1)
print(adata2)

adata_all = tl.davae_preprocessing([adata1, adata2], lognorm=False, hvg=False)
adata_integrate = davae.fit_integration(adata_all,
                                        batch_num=2,
                                        split_by='batch_label',
                                        domain_lambda=2.0,
                                        epochs=opt.epoch,
                                        sparse=True,
                                        hidden_layers=[64, 32, 6])