parser.add_argument("--base_path", type=str, default='/Users/zhongyuanke/data/hca/', help="base path") parser.add_argument("--epoch", type=int, default=15, help="epochs") opt = parser.parse_args() base_path = opt.base_path epoch = opt.epoch # time_list = [] adata1 = sc.read_h5ad(base_path + 'blood_5w.h5ad') adata2 = sc.read_h5ad(base_path + 'bone_5w.h5ad') print(adata1) print(adata2) adata_all = tl.davae_preprocessing([adata1, adata2]) t0 = time.time() adata_out = davae.fit_integration(adata_all, batch_size=256, epochs=epoch, sparse=True) t1 = time.time() print("Total time running DAVAE 10w cells: %s seconds" % (str(t1 - t0))) time_list.append(t1 - t0) info = psutil.virtual_memory() print('内存使用:', psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 / 1024, 'GB') print('总内存:', info.total / 1024 / 1024 / 1024, 'GB') print('内存占比:', info.percent) print('cpu个数:', psutil.cpu_count())
import scbean.model.davae as davae import scbean.tools.utils as tl import scanpy as sc import matplotlib from numpy.random import seed seed(2021) matplotlib.use('TkAgg') adata = tl.read_sc_data('/Users/zhongyuanke/data/seurat_data/ifnb/ifnb.h5ad') datasets = tl.split_object(adata, by="stim") print(datasets[0]) print(datasets[1]) adata_all = tl.davae_preprocessing(datasets, n_top_genes=8000) adata_intagrate = davae.fit_integration( adata_all, epochs=30, hidden_layers=[128, 64, 32, 5], domain_lambda=3.0, ) print(adata_intagrate) sc.pp.neighbors(adata_intagrate, use_rep='X_davae', n_neighbors=15) sc.tl.louvain(adata_intagrate) sc.tl.umap(adata_intagrate) sc.pl.umap(adata_intagrate, color='louvain', cmap='tab20c')
# sc.pp.highly_variable_genes(adata2, n_top_genes=6000) # adata1 = adata1[:, adata1.var.highly_variable] # adata2 = adata2[:, adata2.var.highly_variable] # # adata1.write_h5ad(file1_p) # adata2.write_h5ad(file2_p) # del adata1.var['highly_variable'] # del adata2.var['highly_variable'] # del adata1.var['means'] # del adata2.var['means'] # del adata1.var['dispersions'] # del adata2.var['dispersions'] # del adata1.var['dispersions_norm'] # del adata2.var['dispersions_norm'] print(adata1) print(adata2) adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=4000) adata_integrate = davae.fit_integration( adata_all, epochs=25, hidden_layers=[128, 64, 32, 5], sparse=True, domain_lambda=0.5, ) # import umap # adata_integrate.obsm['X_umap']=umap.UMAP().fit_transform(adata_integrate.obsm['X_davae']) sc.pp.neighbors(adata_integrate, use_rep='X_davae', n_neighbors=8) sc.tl.umap(adata_integrate) sc.pl.umap(adata_integrate, color=['_batch'], s=3) adata_integrate.write_h5ad(base_path + out_path)
matplotlib.use('TkAgg') epochs = 40 base_path = '/Users/zhongyuanke/data/' file1 = base_path + 'dann_vae/benchmark1/dc_batch1.h5ad' file2 = base_path + 'dann_vae/benchmark1/dc_batch2.h5ad' orig_path = base_path + 'dann_vae/benchmark1/orig.h5ad' # -------------train--------------------- adata1 = tl.read_sc_data(file1, fmt='h5ad') adata2 = tl.read_sc_data(file2, fmt='h5ad') adata_orig = tl.read_sc_data(orig_path, fmt='h5ad') # orig_label =adata_orig.obs['label'] print(adata1) print(adata2) adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=4000, sparse=False) adata_integrate = davae.fit_integration(adata_all, split_by='batch', epochs=1000, hidden_layers=[128, 64, 32, 2], sparse=False, domain_lambda=6) adata_integrate.obs['label'] = adata_orig.obs['label'] sc.pp.neighbors(adata_integrate, use_rep='X_davae') sc.tl.umap(adata_integrate) sc.pl.umap(adata_integrate, color=['batch', 'label'], s=10, cmap='Dark2') # print(adata_integrate) # adata_integrate.write_h5ad(base_path+'dann_vae/benchmark1/dc_davae_temp.h5ad')
base_path = '/Users/zhongyuanke/data/' file_rna = '/Users/zhongyuanke/data/dann_vae/multimodal/rna.h5ad' file_atac = '/Users/zhongyuanke/data/dann_vae/multimodal/atac.h5ad' seurat_celltype_path = base_path + 'multimodal/atac_pbmc_10k/celltype_filt.csv' batch_size = 128 adata1 = sc.read_h5ad(file_atac) adata2 = sc.read_h5ad(file_rna) print(adata1) print(adata2) # adata_b1.obs_names_make_unique() # adata_b2.obs_names_make_unique() # adata_b3.obs_names_make_unique() adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=2000, hvg=False, lognorm=False) adata_all.obs_names_make_unique() adata_all = scgen.setup_anndata(adata_all, batch_key="batch_label", copy=True) model = scgen.SCGEN(adata_all) model.train(max_epochs=15, batch_size=32, early_stopping=True, early_stopping_patience=25, use_gpu=False) corrected_adata = model.batch_removal() sc.pp.neighbors(corrected_adata,use_rep='corrected_latent') sc.tl.umap(corrected_adata) sc.pl.umap(corrected_adata,color='batch') corrected_adata.write('/Users/zhongyuanke/data/scgen/scgen_multimodal.h5ad')
import scbean.tools.utils as tl import matplotlib from numpy.random import seed seed(2021) matplotlib.use('TkAgg') base_path = "/Users/zhongyuanke/data/vipcca/mixed_cell_lines/" adata_b1 = sc.read_h5ad(base_path+"293t.h5ad") adata_b2 = sc.read_h5ad(base_path+"jurkat.h5ad") adata_b3 = sc.read_h5ad(base_path+"mixed.h5ad") # adata_b1.obs_names_make_unique() # adata_b2.obs_names_make_unique() # adata_b3.obs_names_make_unique() adata_all = tl.davae_preprocessing([adata_b1, adata_b2, adata_b3], n_top_genes=3000) adata_all.obs_names_make_unique() adata_all = scgen.setup_anndata(adata_all, batch_key="batch_label", copy=True) model = scgen.SCGEN(adata_all) model.train(max_epochs=25, batch_size=32, early_stopping=True, early_stopping_patience=25, use_gpu=False) corrected_adata = model.batch_removal() sc.pp.neighbors(corrected_adata,use_rep='corrected_latent') sc.tl.umap(corrected_adata) sc.pl.umap(corrected_adata,color='celltype') # corrected_adata.write('/Users/zhongyuanke/data/scgen/scgen_mcl01.h5ad')
type=str, default='/Users/zhongyuanke/data/', help="base path") parser.add_argument("--epoch", type=int, default=10, help="epoch of training") opt = parser.parse_args() base_path = opt.base_path out_path = 'dann_vae/hca/davae_01.h5ad' file1 = base_path + 'HCA/ica_cord_blood_h5.h5' file2 = base_path + 'HCA/ica_bone_marrow_h5.h5' adata1 = tl.read_sc_data(file1, fmt='10x_h5') adata2 = tl.read_sc_data(file2, fmt='10x_h5') adata1.var_names_make_unique() adata2.var_names_make_unique() print(adata1) adata_all = tl.davae_preprocessing([adata1, adata2], hvg=False, lognorm=False) adata_integrate = davae.fit_integration( adata_all, split_by='batch', domain_lambda=5, epochs=1, hidden_layers=[128, 64, 32, 5], sparse=True, ) sc.pp.neighbors(adata_integrate, use_rep='X_davae') sc.tl.umap(adata_integrate) sc.pl.umap(adata_integrate, color='batch') print(adata_integrate)
# loss_weight = np.array(loss_weight) # print(adata1.shape) # print(adata2.shape) # print(orig_data.shape) # data, batches, batch_label, loss_weight = shuffle(orig_data, orig_batches, orig_batch_label, loss_weight, # random_state=0) # net_x = DACVAE(input_size=data.shape[1], batches=2, latent_size=10) # net_x.build() # net_x.compile() # his = net_x.train(data, batches, loss_weight, epochs=epochs, batch_size=batch_size) # # mid = net_x.embedding(orig_data, orig_batches) # adata_mid = anndata.AnnData(X=mid) # adata_mid.obs['batch'] = orig_batch_label # adata.obsm['davae'] = mid adata_all = tl.davae_preprocessing(adata_list, sparse=False) adata_out = davae.fit_integration(adata_all, split_by='batch', mode='DACVAE', domain_lambda=5.0, epochs=3, hidden_layers=[128, 64, 32, 5], sparse=False) sc.pp.neighbors(adata_out, use_rep='X_davae') sc.tl.umap(adata_out) sc.pl.umap(adata_out, color='celltype') print(adata_out) # adata.write_h5ad(base_path+out_path)