Пример #1
0
def load_data(filename, batch_size):
    import sys
    sys.path.insert(0, './lib/pylib/')
    import preprocess_10x as prep
    adata = prep.read_10x_data(filename, format_type="10x_h5ad")
    # adata = adata[adata.obs.replicate_id.cat.codes<4,:]
    n_batches = int(adata.shape[0] / batch_size)
    input_size = int(adata.shape[1])
    nindex = np.random.permutation(adata.obs.index)
    adata = adata[nindex, :]
    return adata, n_batches, input_size
Пример #2
0
def load_data(filename, batch_size, ref=True):
    adata = prep.read_10x_data(filename, format_type="10x_h5ad")
    adata.obs['cell_type_code'] = adata.obs['cell_type'].cat.codes
    adata.obs['cell_type_code'] = adata.obs['cell_type_code'].astype(
        "category")
    if ref:
        adata = adata[adata.obs.replicate_id.cat.codes < 4, :]
    else:
        adata = adata[adata.obs.replicate_id.cat.codes >= 4, :]
    adata = prep.normalization(adata,
                               path=path,
                               hg=False,
                               filter_disp_genes=ref)
    n_batches = int(adata.shape[0] / batch_size)
    input_size = int(adata.shape[1])
    nindex = np.random.permutation(adata.obs.index)
    adata = adata[nindex, :]
    return adata, n_batches, input_size
Пример #3
0
sys.path.insert(0,'./lib/pylib/')
import preprocess_10x as prep
import zinb_AE as zae
import scanpy as sc
import matplotlib.pyplot as pl
import seaborn as sns
import pandas as pd
import os
sns.set(style='white', rc={'figure.figsize':(8,6), 'figure.dpi':150})
# Read projection after louvain and leiden clusters
method="t_test"
# path="./result/ica_bm_qc3/"
path=sys.argv[1]
cluster_type="louvain"
h5adfile_after_cluster=path+"ica_markers_"+method+".h5ad"
adata = prep.read_10x_data(h5adfile_after_cluster,"10x_h5ad")

sc.pl.tsne(adata,color=['LYZ','CST3','CD14','MS4A7','FCGR3A','FCER1A'],show=False,ncols=3)
pl.savefig(path+"ica_tsne_marker1.png")
pl.close()
sc.pl.tsne(adata,color=['GNLY','NKG7','KLRB1','IL7R','CD8B','CD27'],show=False,ncols=3)
pl.savefig(path+"ica_tsne_marker2.png")
pl.close()
sc.pl.tsne(adata,color=['CD79A','MME','MS4A1','SEPP1','SDC1','MZB1'],show=False,ncols=3)
pl.savefig(path+"ica_tsne_marker3.png")
pl.close()
sc.pl.tsne(adata,color=['PPBP','IL3RA','HBB','CD34','PF4','RPL34'],show=False,ncols=3)
pl.savefig(path+"ica_tsne_marker4.png")
pl.close()

sc.pl.violin(adata, ['n_genes', 'n_counts', 'percent_mito'],jitter=0.4, multi_panel=True,show=False)
Пример #4
0
import sys
sys.path.insert(0, './lib/pylib/')
import preprocess_10x as prep

ifile = sys.argv[1]
ifile2 = sys.argv[2]
path = sys.argv[3]
############## Read 10x datasets###################
adata = prep.read_10x_data(ifile, "10x_h5")
adata_cluster = prep.read_10x_data(ifile2, "10x_h5ad")
cellindex = adata_cluster.obs_names
adata = adata[cellindex, :]
# adata.obs = adata.obs.join(adata_cluster.obs)
adata.obs['louvain'] = adata_cluster.obs['louvain']
adata.uns = adata_cluster.uns
adata.obsm = adata_cluster.obsm
prep.write2mtx(adata, path=path)
Пример #5
0
import sys
sys.path.insert(0, './lib/pylib/')
import preprocess_10x as prep
import zinb_AE as zae
############## train model from ./test/train_model_test.py
############## Read reference panel in h5ad###################
# ifile='./data/hca/ica_bm_after_filtered_recipe_zheng_qc1.h5ad'
# ifile_raw='./data/hca/ica_bm_after_filtered_raw_recipe_zheng_qc1.h5ad'
# outputpath="./result/ica_bm_qc1/"
ifile = sys.argv[1]
ifile_raw = sys.argv[2]
outputpath = sys.argv[3]
adata = prep.read_10x_data(ifile, "10x_h5ad", 'r')
adataraw = prep.read_10x_data(ifile_raw, "10x_h5ad")
adata.raw = adataraw.copy()
############# Predict latent layer#####################
zae.prediction(adata, outputpath)
Пример #6
0
import sys, os
sys.path.insert(0, './lib/pylib/')
import preprocess_10x as prep
import numpy as np
from anndata import AnnData

datapath = os.listdir("./data/zheng/")
datapath.remove("293t_filtered_gene_bc_matrices_mex")
datapath.remove("frozen_pbmc_donor_b")
datapath.remove("frozen_pbmc_donor_c")
datapath.remove("fresh_68k_pbmc_donor_a")

for i in range(len(datapath)):
    file = "./data/zheng/" + datapath[i] + "/hg19/"
    adata = prep.read_10x_data(file, "10x_mtx")
    adata.obs['cell_type'] = datapath[i]
    if i == 0:
        X = adata.X.toarray()
        obs = adata.obs
        var = adata.var
    else:
        X = np.vstack((X, adata.X.toarray()))
        obs = obs.append(adata.obs)

adata = AnnData(X, obs=obs, var=var, dtype=X.dtype.name, filemode=True)

adata.write("./data/zheng/ten_mixed_cell_types.h5ad", compression="gzip")
Пример #7
0
sys.path.insert(0, './lib/pylib/')
import preprocess_10x as prep
import zinb_AE as zae
import scanpy as sc
import scanpy.api
import matplotlib.pyplot as pl
import seaborn as sns
import random

sns.set(style='white', rc={'figure.figsize': (8, 6), 'figure.dpi': 150})
############## train model from ./test/train_model_test.py
# ############# Read reference panel in h5ad###################
ifile = './data/hca/ica_bm_after_filtered_recipe_zheng_qc1.h5ad'
ifile_raw = './data/hca/ica_bm_after_filtered_raw_recipe_zheng_qc1.h5ad'
outputpath = "./result/ica_bm_qc1/"
adata = prep.read_10x_data(ifile, "10x_h5ad", 'r')
adataraw = prep.read_10x_data(ifile_raw, "10x_h5ad")
adata.raw = adataraw.copy()
############# Predict latent layer#####################
zae.prediction(adata, outputpath)
############## Read projection and plot clusters########
filepath = "result/ica_bm_qc1/"
adata = prep.read_10x_data(filepath + "projection.csv", "10x_csv")
# adata = prep.read_10x_data(filepath+"ica_clusters.h5ad","10x_h5ad")
zae.plotCluster(adata, filepath=filepath)
############## Plot Clusters ##############################
# ifile="result/zheng/frozen_pbmc_donor_b/projection.csv"
# h5adfile="./result/zheng/frozen_pbmc_donor_b/frozen_pbmc_donor_b_clusters.h5ad"
# o1="result/figures/plot_umap_frozen_pbmc_donor_b.pdf"
# o2="result/figures/plot_tsne_frozen_pbmc_donor_b.pdf"
# adata = prep.read_10x_data(ifile,"10x_csv")
Пример #8
0
import sys
sys.path.insert(0, './lib/pylib/')
import preprocess_10x as prep
import zinb_AE as zae
import scanpy as sc
import scanpy.api
import matplotlib.pyplot as pl
import seaborn as sns
import random
sns.set(style='white', rc={'figure.figsize': (8, 6), 'figure.dpi': 150})
############## Read projection and plot clusters########
filepath = sys.argv[1]
adata = prep.read_10x_data(filepath + "projection.csv", "10x_csv")
zae.plotCluster(adata, filepath=filepath, dm_reduction=True)