示例#1
0
import numpy as np
from scbean.tools import utils as tl
import scanpy as sc
import pandas as pd
import scanorama
import argparse

base_path = '/Users/zhongyuanke/data/'

file1 = 'dropviz/mouse_brain_dropviz_filtered.h5ad'
file2 = 'nuclei/adata_nuclei_filtered.h5ad'
scan_path = 'results/scan_mouse.h5ad'

# -------------train---------------------
adata1 = tl.read_sc_data(file1, fmt='h5ad')
adata2 = tl.read_sc_data(file2, fmt='h5ad')
# orig_label =adata_orig.obs['label']
print(adata1)
print(adata2)
datas = [adata1, adata2]
corrected = scanorama.correct_scanpy(datas, return_dimred=True, dimred=16)
adata_corrected = corrected[0].concatenate(corrected[1])

print(adata_corrected)
sc.pp.neighbors(adata_corrected, use_rep='X_scanorama')
sc.tl.umap(adata_corrected)
adata_corrected.write_h5ad(scan_path)
示例#2
0
import scbean.model.davae as davae
import scbean.tools.utils as tl
import scanpy as sc
import matplotlib
from numpy.random import seed
seed(2021)
matplotlib.use('TkAgg')

adata = tl.read_sc_data('/Users/zhongyuanke/data/seurat_data/ifnb/ifnb.h5ad')
datasets = tl.split_object(adata, by="stim")
print(datasets[0])
print(datasets[1])
adata_all = tl.davae_preprocessing(datasets, n_top_genes=8000)
adata_intagrate = davae.fit_integration(
    adata_all,
    epochs=30,
    hidden_layers=[128, 64, 32, 5],
    domain_lambda=3.0,
)
print(adata_intagrate)
sc.pp.neighbors(adata_intagrate, use_rep='X_davae', n_neighbors=15)
sc.tl.louvain(adata_intagrate)
sc.tl.umap(adata_intagrate)
sc.pl.umap(adata_intagrate, color='louvain', cmap='tab20c')
示例#3
0
import matplotlib
from numpy.random import seed
seed(2021)
matplotlib.use('TkAgg')

base_path = '/Users/zhongyuanke/data/'
out_path = 'dann_vae/spatial/davae_save02.h5ad'
file1 = base_path + 'spatial/mouse_brain/10x_mouse_brain_Anterior/V1_Mouse_Brain_Sagittal_Anterior_filtered_feature_bc_matrix.h5'
file2 = base_path + 'spatial/mouse_brain/10x_mouse_brain_Posterior/V1_Mouse_Brain_Sagittal_Posterior_filtered_feature_bc_matrix.h5'
file1_p = base_path + 'spatial/10x_mouse_brain_Anterior/anterior.h5ad'
file2_p = base_path + 'spatial/10x_mouse_brain_Posterior/posterior.h5ad'
batch_size = 256
epochs = 25

# adata1 = sc.read_10x_h5(file1)
adata1 = tl.read_sc_data(file1, fmt='10x_h5', batch_name='Anterior')
adata2 = tl.read_sc_data(file2, fmt='10x_h5', batch_name='Posterior')
print(adata1)
# print(adata1)
adata1.var_names_make_unique()
adata2.var_names_make_unique()
# len1 = adata1.shape[0]
# len2 = adata2.shape[0]

# sc.pp.filter_genes(adata1, min_cells=30)
# sc.pp.filter_genes(adata2, min_cells=30)
# sc.pp.log1p(adata1)
# sc.pp.log1p(adata2)
# print(adata1)
# print(adata2)
#
示例#4
0
from scbean.model import davae as davae
import anndata
import scanpy as sc
import pandas as pd
import matplotlib

matplotlib.use('TkAgg')

epochs = 40
base_path = '/Users/zhongyuanke/data/'

file1 = base_path + 'dann_vae/benchmark1/dc_batch1.h5ad'
file2 = base_path + 'dann_vae/benchmark1/dc_batch2.h5ad'
orig_path = base_path + 'dann_vae/benchmark1/orig.h5ad'
# -------------train---------------------
adata1 = tl.read_sc_data(file1, fmt='h5ad')
adata2 = tl.read_sc_data(file2, fmt='h5ad')
adata_orig = tl.read_sc_data(orig_path, fmt='h5ad')
# orig_label =adata_orig.obs['label']
print(adata1)
print(adata2)
adata_all = tl.davae_preprocessing([adata1, adata2],
                                   n_top_genes=4000,
                                   sparse=False)
adata_integrate = davae.fit_integration(adata_all,
                                        split_by='batch',
                                        epochs=1000,
                                        hidden_layers=[128, 64, 32, 2],
                                        sparse=False,
                                        domain_lambda=6)
adata_integrate.obs['label'] = adata_orig.obs['label']
示例#5
0
import scbean.model.vipcca as vip
import scbean.tools.utils as tl
import scbean.tools.plotting as pl

# Please choose an appropiate matplotlib backend.
import matplotlib
# matplotlib.use('TkAgg')

# read single-cell data.
adata_b1 = tl.read_sc_data("./data/mixed_cell_lines/293t.h5ad", batch_name="293t")
adata_b2 = tl.read_sc_data("./data/mixed_cell_lines/jurkat.h5ad", batch_name="jurkat")
adata_b3 = tl.read_sc_data("./data/mixed_cell_lines/mixed.h5ad", batch_name="mixed")

# tl.preprocessing include filteration, log-TPM normalization, selection of highly variable genes.
adata_all= tl.preprocessing([adata_b1, adata_b2, adata_b3])

# Construct VIPCCA with specific setting.
handle = vip.VIPCCA(
							adata_all,
							res_path='./results/CVAE_5/',
							split_by="_batch",
							epochs=100,
							lambda_regulizer=5,
							)

# Training and integrating multiple single-cell datasets. The VIPCCA's output include cell representation in reduced dimensional space and recovered gene expression.
adata_integrate=handle.fit_integrate()

# Visualization
pl.run_embedding(adata_integrate, path='./results/CVAE_5/',method="umap")
pl.plotEmbedding(adata_integrate, path='./results/CVAE_5/', method='umap', group_by="_batch",legend_loc="right margin")
示例#6
0
matplotlib.use('TkAgg')

parser = argparse.ArgumentParser()
parser.add_argument("--base_path",
                    type=str,
                    default='/Users/zhongyuanke/data/',
                    help="base path")
parser.add_argument("--epoch", type=int, default=10, help="epoch of training")

opt = parser.parse_args()

base_path = opt.base_path
out_path = 'dann_vae/hca/davae_01.h5ad'
file1 = base_path + 'HCA/ica_cord_blood_h5.h5'
file2 = base_path + 'HCA/ica_bone_marrow_h5.h5'
adata1 = tl.read_sc_data(file1, fmt='10x_h5')
adata2 = tl.read_sc_data(file2, fmt='10x_h5')
adata1.var_names_make_unique()
adata2.var_names_make_unique()
print(adata1)

adata_all = tl.davae_preprocessing([adata1, adata2], hvg=False, lognorm=False)
adata_integrate = davae.fit_integration(
    adata_all,
    split_by='batch',
    domain_lambda=5,
    epochs=1,
    hidden_layers=[128, 64, 32, 5],
    sparse=True,
)
sc.pp.neighbors(adata_integrate, use_rep='X_davae')
示例#7
0
import scbean.model.davae as davae
import scbean.tools.utils as tl
import scanpy as sc
import matplotlib
from numpy.random import seed
seed(2021)
matplotlib.use('TkAgg')

base_path = "/Users/zhongyuanke/data/vipcca/mixed_cell_lines/"

adata_b1 = tl.read_sc_data(base_path + "293t.h5ad", batch_name="293t")
adata_b2 = tl.read_sc_data(base_path + "jurkat.h5ad", batch_name="jurkat")
adata_b3 = tl.read_sc_data(base_path + "mixed.h5ad", batch_name="mixed")
adata = adata_b1.concatenate(adata_b2)
adata = adata.concatenate(adata_b3)
adata.write_h5ad('/Users/zhongyuanke/data/pbmc/zheng/mcl.h5ad')

adata_all = tl.davae_preprocessing([adata_b1, adata_b2, adata_b3],
                                   n_top_genes=3000)
print(adata_all)
print(adata_all)
adata_integrate = davae.fit_integration(adata_all,
                                        batch_num=3,
                                        split_by='batch_label',
                                        domain_lambda=3.0,
                                        epochs=25,
                                        sparse=True,
                                        hidden_layers=[128, 64, 32, 5])
# sc.pp.neighbors(adata_integrate, use_rep='X_davae', n_neighbors=10)
# sc.tl.umap(adata_integrate)
import umap
示例#8
0
import scbean.model.davae as davae
import scbean.tools.utils as tl
import scanpy as sc
import matplotlib
from numpy.random import seed
import umap

seed(2021)
matplotlib.use('TkAgg')

r1 = "./data/mixed_cell_lines/mixed.h5ad"
r2 = "./data/mixed_cell_lines/293t.h5ad"
r3 = "./data/mixed_cell_lines/jurkat.h5ad"

adata_b1 = tl.read_sc_data(r1, batch_name='mix')
adata_b2 = tl.read_sc_data(r2, batch_name='293t')
adata_b3 = tl.read_sc_data(r3, batch_name='jurkat')

adata_all = tl.davae_preprocessing([adata_b1, adata_b2, adata_b3],
                                   n_top_genes=2000)
adata_integrate = davae.fit_integration(adata_all,
                                        batch_num=3,
                                        domain_lambda=3.0,
                                        epochs=25,
                                        sparse=True,
                                        hidden_layers=[64, 32, 6])
adata_integrate.obsm['X_umap'] = umap.UMAP().fit_transform(
    adata_integrate.obsm['X_davae'])
sc.pl.umap(adata_integrate, color=['_batch', 'celltype'], s=3)