示例#1
0
def load_samples(sample_ids):
    """
    Load Velocyto output loom files and seurat data

    Parameters
    ----------
    sample_ids : list
        list of sample IDs.

    Returns
    -------
    samples : dict
        dictionary of dictionaries, {s : dict} for s in sample_ids
            Each subdictionary contains:
                AnnData object (key: 'main') loaded from loom file
                AnnData object (key: from filename) for csv files with seurat data
    """
    samples = {}
    for s in sample_ids:
        samples[s] = {}

        filename = "./inputs/" + s + ".loom"
        comment = "\nLoading sample " + s + " from " + filename
        print(comment)
        samples[s]['main'] = scv.read(filename, cache = False)

        csv_files = filter(lambda x: x.startswith(s) & x.endswith(".csv"), 
                           os.listdir("./outputs/seurat_dat/"))
        for csvf in csv_files:
            varname = os.path.splitext(csvf)[0].casefold()[len(s)+1:]
            samples[s][varname] = scv.read("./outputs/seurat_dat/" + csvf, cache = False)
    return(samples)
示例#2
0
def read_adata(train_paths, train_datasets, test_path, test_dataset):
    """ read adata files of training and testing datasets
    
    This function reads a list of training datasets (at least one) and one testing dataset from .h5ad files and returns a list of training datasets anndata objects, and a testing anndata object.
    
    parameters
    ----------
    train_paths: `list`
        list of paths where training datasets are located
    train_datasets: `list`
        a list of names of training datasets (same order as paths) 
    test_path: `string` 
        path of test dataset
    test_dataset: `string`
        name of test dataset
    

    returns
    -------
    list
        A list of training dataset anndata objects

    AnnData
        An anndata object containing testing dataset
    """

    adata_trains = []
    for i in range(len(train_datasets)):
        adata_trains.append(
            scv.read(os.path.join(train_paths[i], train_datasets[i])))
    adata_pred = scv.read(os.path.join(test_path, test_dataset))
    return adata_trains, adata_pred
def scv_open(file_name):
    """
    Open a file for scv RNA verlocity analysis. See https://scvelo.readthedocs.io/VelocityBasics.html
    for the file format and requirement.
    :param file_name:
    :return:
    """
    adata = scv.read(file_name, cache=True)
    adata.var_names_make_unique()
    return adata
示例#4
0
def subset_anndata(loom, csvfile):
    adata = scv.read(loom)
    adata.var_names_make_unique()
    adata.obs.index = [
        re.search("[ACTG]{6,}", x).group(0) for x in adata.obs.index
    ]
    my_pd = pd.read_csv(csvfile + ".csv", index_col=0, encoding="utf8")
    intersected = adata.obs.index.intersection(my_pd.index)
    adata = adata[intersected, :]
    return adata
示例#5
0
def read_raw(train_paths, train_datasets, test_path, test_dataset):
    """ read from adata.raw and revert log1p normalization
    
    This function reads a list of training datasets (at least one) and one testing dataset and reverses log1p normalization on the raw set that includes all genes and has not been regressed out

    parameters
    ----------
    train_paths: `list`
        list of paths where training datasets are located
    train_datasets: `list`
        a list of names of training datasets (same order as paths) 
    test_path: `string` 
        path of test dataset
    test_dataset: `string`
        name of test dataset
    
    returns
    -------
    list
        A list of training dataset anndata objects

    AnnData
        An anndata object containing testing dataset
    """

    adata_trains = []
    for i in range(len(train_datasets)):
        adata_trains.append(
            scv.read(os.path.join(train_paths[i], train_datasets[i])))
        adata_trains[i] = sc.AnnData(X=np.expm1(adata_trains[i].raw.X),
                                     obs=adata_trains[i].obs,
                                     var=adata_trains[i].raw.var)
    adata_pred = scv.read(os.path.join(test_path, test_dataset))
    adata_pred = sc.AnnData(X=np.expm1(adata_pred.raw.X),
                            obs=adata_pred.obs,
                            var=adata_pred[i].raw.var)

    return adata_trains, adata_pred
示例#6
0
def load_pca_data(dictionary):
    """
    Loads PCA loadings and variance data output by extractSeurat.R

    Parameters
    ----------
    dictionary : dict
        dictionary

    Returns
    -------
    dictionary : dict
        dictionary, with keys added for PCA loadings and variance 
    """
    try:
        comment = "\nImporting PCA loadings and variance data"
        print(comment)
        dictionary['pca_data'] = {}
        dictionary['pca_data']['loadings'] = scv.read("./outputs/seurat_dat/seur_pca_loadings.csv")
        dictionary['pca_data']['variance'] = scv.read("./outputs/seurat_dat/seur_pca_var.csv")
    except OSError as err:
        print("ERROR: " + err)
        print("Proceeding without PCA variance or loadings")
    return(dictionary)
示例#7
0
def plot_velocity(script_pkl, loom_path, components='1,2'):

    data = script_pkl["expression_table"]
    pca = script_pkl["PC_expression"]
    meta_data = script_pkl["annotation"]

    data.index = rename_shl(data.index)
    pca.index = rename_shl(pca.index)
    meta_data.index = rename_shl(meta_data.index)

    adata = anndata.AnnData(data, meta_data)

    # Crate an analysis object
    adata_loom = scv.read("../20170407-SHL-FACS-Hs_proj.loom", cache=True)

    retained_cells = list(
        set(adata_loom.obs.index).intersection(set(adata.obs.index)))
    retained_cells.sort()
    adata_loom = adata_loom[retained_cells, :]

    adata_loom.var_names_make_unique()

    # plot proporations spliced/unspliced
    # scv.pl.proportions(adata_loom)

    # #preprocess

    scv.pp.filter_and_normalize(adata_loom,
                                min_shared_counts=20,
                                n_top_genes=2000)
    scv.pp.moments(adata_loom, n_pcs=30, n_neighbors=30)

    adata_loom.obsm['X_pca'][:, 0:20] = pca

    scv.tl.velocity(adata_loom)
    scv.tl.velocity_graph(adata_loom)

    scv.pl.velocity_embedding(adata_loom, basis='pca', components=components)
示例#8
0
import scvelo as scv
import pandas as pd
import os
import numpy as np

os.chdir(
    r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/Magic_based"
)
seur_loc = r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/R_references/Seurat_integration/"

namez = pd.read_csv(seur_loc + 'spliced_seur.csv', index_col=0).index
namez = namez.to_list()

s = scv.read(seur_loc + 'spliced_seur.csv',
             cache=True,
             first_column_names=True,
             index_col=0)
s = s.transpose()
u = scv.read(seur_loc + 'unspliced_seur.csv',
             cache=True,
             first_column_names=True)
u = u.transpose()

adata = s
adata.layers['spliced'] = s.X
adata.layers['unspliced'] = u.X
adata.var_names = namez
scv.pp.moments(adata, n_neighbors=15)

s = scv.read(seur_loc + 'spliced_magic.csv',
             cache=True,
示例#9
0
import numpy as np
import pandas as pd


scv.settings.set_figure_params('scvelo')

working_dir = "/home/kwells4/mTEC_dev/mtec_snakemake/"

seurat_cells = working_dir + "not_yet_included/seurat_info_new.csv"
output = working_dir + "/figure_output/figure_2d_new2.pdf"
input_loom = working_dir + "not_yet_included/wt_velocyto.loom"
seurat_df = pd.read_csv(seurat_cells, index_col = 0)
supplement_output = working_dir +"/figure_output/supplemental_scvelo_oct_2019.pdf"

seurat_cell_list = list(seurat_df.index)
adata = scv.read(input_loom, sparse = True, cache = True)

adata.var_names_make_unique()
adata = adata[adata.obs.index.isin(seurat_cell_list)]
scv.utils.show_proportions(adata)
scv.utils.cleanup(adata, clean='all')



scv.pp.filter_and_normalize(adata, min_counts=20, min_counts_u=10, n_top_genes=3000)

scv.pp.moments(adata, n_pcs=30, n_neighbors=30)

scv.tl.velocity(adata)

scv.tl.velocity_graph(adata)
示例#10
0
adata.obs["batch_group"] = metadata["batch_group"].values
adata.obs["cluster"] = umap_cord.iloc[:,2].values

# Add features (VAR)
feat = pd.read_csv("../data/data_RNAvelocity/processed_counts.txt", sep= " ")
feat = feat.index
feat = pd.DataFrame(feat)
feat = pd.DataFrame({"feature" : feat.iloc[:,0]})
adata.var["features"] = feat.values


# %%


# Read LOOM file
ldata = scv.read("../data/data_RNAvelocity/s_un_am_allgenes.loom", cache=True)
# Merge adata with loom file
adata = scv.utils.merge(adata, ldata)


# %%


#we compute the first- and second-order moments (basically means and variances) for velocity estimation:
scv.pp.moments(adata)


# %%


# Estimates of velocity
示例#11
0
    signatures = dict([(ct, tab.gene.values[tab.cell_type_epi_custom == ct])
                       for ct in pd.unique(tab.cell_type_epi_custom)])

    letters = ['C', 'E', 'W']
    donors = ['NCO', 'p009ot', 'p013ot']
    scv.settings.figdir = '/fast/work/users/peidlis_c/sodar_patient_organoid_data/figures'
    # Note: Never analyse NCO and patient tumors together.

    panel_genes = ['FABP1', 'PHGR1', 'TFF3', 'MKI67', 'CD44']

    # prep and save data
    recalc = True
    if recalc:
        # Split up conditions:
        for letter in letters:
            cdata = scv.read(data_path + 'NB_AS_' + letter +
                             '/demuxed/NB_AS_' + letter + '_demuxed.h5')
            for donor in donors:
                print(letter, donor)
                adata = cdata[cdata.obs['SNPdemux'] == donor].copy()
                adata.var_names_make_unique()
                adata = pp(adata, min_counts=5000, max_perc_mito=.25)
                # ct_annotate(adata, signatures, rescore=True)
                adata.write(data_path + 'NB_AS_' + letter +
                            '/processed/NB_AS_' + letter + '_' + donor + '.h5')

        # Aggregate conditions:
        for donor in donors:
            adata = None
            for letter in letters:
                dat = scv.read(data_path + 'NB_AS_' + letter +
                               '/demuxed/NB_AS_' + letter + '_demuxed.h5')
示例#12
0
                    type=str,
                    help="reduced dimension 1")
parser.add_argument("--rdim2",
                    default="UMAP2",
                    type=str,
                    help="reduced dimension 2")

args = parser.parse_args()

# ########################################################################### #
# ######################## Initialise AnnData ############################### #
# ########################################################################### #

if not args.loom == "none":

    adata = scv.read(args.loom)
    # get directory with metadata + barcodes
    metadata_dir = args.rdims.split("/")[0]

elif not args.dropest_dir == "none":

    exon_matrix = os.path.join(args.dropest_dir, "exons.mtx.gz")
    intron_matrix = os.path.join(args.dropest_dir, "introns.mtx.gz")
    spanning_matrix = os.path.join(args.dropest_dir, "spanning.mtx.gz")

    exons = io.mmread(exon_matrix).transpose().tocsr()
    introns = io.mmread(intron_matrix).transpose().tocsr()
    spanning = io.mmread(spanning_matrix).transpose().tocsr()

    adata = ad.AnnData(X=exons)
    adata.layers["spliced"] = adata.X
示例#13
0
# Part 2 of calculating velocity
# Import loom files created by merging pulps 2,3,13 and 14 in Seurat
# Import umap embeddings from seurat to visualize velocity in the same umap coordinates

import scvelo as scv
import os.path

# -----------------------------------------------------------------------------------
# Open loom files from merged Pulp 2,3,13,14
# I ran python locally so make sure to check for correct paths
pulp_sdata = scv.read('/Users/delaura/Documents/Tooth/AllPulp/merged/sf.loom',
                      cache=True)
pulp_ndata = scv.read('/Users/delaura/Documents/Tooth/AllPulp/merged/uf.loom',
                      cache=True)
pulp_sdata.layers['spliced'] = pulp_sdata.X
pulp_sdata.layers['unspliced'] = pulp_ndata.X
pulp_sdata

scv.utils.show_proportions(pulp_sdata)

# import umap embeddings from R (Seurat) and add to anndata object
data_folder = "/Users/delaura/Documents/Tooth/AllPulp/merged"
# import harmony umap embedding saved from Seurat object
obsm = scv.read_csv(os.path.join(data_folder, "embednonames.csv"))
pulp_sdata.obsm["X_umap"] = obsm.values
scv.pp.filter_and_normalize(
    pulp_sdata, min_shared_counts=30,
    n_top_genes=2000)  # don't run if using seurat pre-filtered data

scv.pp.moments(pulp_sdata, n_pcs=30, n_neighbors=30)
# scv.tl.umap(pulp_sdata) # if re-doing umap within scvelo
示例#14
0
def RunSCVELO(adata=None,
              h5ad=None,
              group_by=None,
              liner_reduction=None,
              nonliner_reduction=None,
              dirpath="./",
              fileprefix="",
              dpi=300,
              min_shared_counts=30,
              n_pcs=30,
              n_neighbors=30,
              approx=True,
              stream_smooth=0.3,
              stream_density=1.2,
              arrow_density=0.05,
              arrow_length=15,
              arrow_size=15,
              paga_threshold=0.15,
              calculate_velocity_genes=False,
              velocity_genes_min_corr=0.3,
              velocity_ngenes=100,
              s_genes=None,
              g2m_genes=None,
              recover_dynamics=False,
              n_jobs=12,
              velocity_with_noise=False,
              calculate_dynamical_genes=False,
              dynamical_ngenes=100,
              diff_kinetics=False):
    import matplotlib.pyplot as plt
    import random
    random.seed(11)
    import scvelo as scv
    import pandas as pd
    import os
    prevdir = os.getcwd()
    os.chdir(os.path.expanduser(dirpath))

    try:
        if adata is None and h5ad is None:
            print("adata or h5ad must be provided.")
            exit()
        if group_by is None or liner_reduction is None or nonliner_reduction is None:
            print(
                "group_by, liner_reduction and nonliner_reduction must be all provided."
            )
            exit()
        if adata is None:
            adata = scv.read(h5ad)
        del adata.uns

        liner_reduction = "X_" + liner_reduction
        nonliner_reduction = "X_" + nonliner_reduction
        adata.obs[group_by] = adata.obs[group_by].astype(dtype="category")

        scv.pp.filter_and_normalize(adata, min_shared_counts=min_shared_counts)
        scv.pp.moments(adata,
                       n_pcs=n_pcs,
                       use_rep=liner_reduction,
                       n_neighbors=n_neighbors)

        scv.tl.velocity(adata, vkey="stochastic")
        scv.tl.velocity_graph(adata,
                              vkey="stochastic",
                              n_neighbors=n_neighbors,
                              approx=approx)
        scv.pl.velocity_embedding_stream(adata,
                                         title="stochastic",
                                         basis=nonliner_reduction,
                                         vkey=["stochastic"],
                                         color=group_by,
                                         smooth=stream_smooth,
                                         density=stream_density,
                                         save=False,
                                         show=True)
        plt.savefig('.'.join(
            filter(None, [fileprefix, "stochastic_stream.png"])),
                    dpi=dpi)
        scv.pl.velocity_embedding(adata,
                                  title="stochastic",
                                  basis=nonliner_reduction,
                                  vkey=["stochastic"],
                                  color=group_by,
                                  size=20,
                                  arrow_length=arrow_length,
                                  arrow_size=arrow_size,
                                  density=arrow_density,
                                  save=False,
                                  show=True)
        plt.savefig('.'.join(filter(None,
                                    [fileprefix, "stochastic_arrow.png"])),
                    dpi=dpi)

        scv.tl.velocity_confidence(adata, vkey="stochastic")
        scv.tl.velocity_pseudotime(adata, vkey="stochastic")
        scv.pl.scatter(adata,
                       basis=nonliner_reduction,
                       color=('stochastic_length', 'stochastic_confidence'),
                       cmap='coolwarm',
                       perc=[5, 95],
                       save=False,
                       show=True)
        plt.savefig('.'.join(
            filter(None, [fileprefix, "stochastic_length_confidence.png"])),
                    dpi=dpi)
        scv.pl.scatter(adata,
                       basis=nonliner_reduction,
                       color='stochastic_pseudotime',
                       cmap='gnuplot',
                       save=False,
                       show=True)
        plt.savefig('.'.join(
            filter(None, [fileprefix, "stochastic_pseudotime.png"])),
                    dpi=dpi)

        adata.uns['neighbors']['distances'] = adata.obsp['distances']
        adata.uns['neighbors']['connectivities'] = adata.obsp['connectivities']
        scv.tl.paga(adata, groups=group_by, vkey="stochastic")
        scv.pl.paga(adata,
                    basis=nonliner_reduction[2:],
                    threshold=paga_threshold,
                    size=50,
                    alpha=0.02,
                    min_edge_width=2,
                    node_size_scale=1.5,
                    save=False,
                    show=True)
        plt.savefig('.'.join(filter(None,
                                    [fileprefix, "stochastic_paga.png"])),
                    dpi=dpi)

        if calculate_velocity_genes is True:
            scv.tl.rank_velocity_genes(adata,
                                       vkey="stochastic",
                                       groupby=group_by,
                                       min_corr=velocity_genes_min_corr,
                                       n_genes=velocity_ngenes)
            df = scv.DataFrame(adata.uns['rank_velocity_genes']['names'])
            for cluster in df.columns:
                #df[0:1].values.ravel()[:12] ### by row
                scv.pl.scatter(adata,
                               color=group_by,
                               basis=df[cluster].values[:6],
                               size=20,
                               linewidth=2,
                               alpha=1,
                               ylabel="cluster: " + cluster + "\nunspliced",
                               add_linfit=True,
                               add_rug=True,
                               add_outline=True,
                               ncols=3,
                               frameon=True,
                               save=False,
                               show=False)
                plt.savefig('.'.join(
                    filter(None,
                           [fileprefix, cluster, "stochastic_genes1.png"])),
                            dpi=dpi)
                scv.pl.velocity(adata,
                                color=group_by,
                                var_names=df[cluster].values[:6],
                                size=10,
                                linewidth=2,
                                alpha=1,
                                ylabel="cluster: " + cluster + "\nunspliced",
                                add_outline=True,
                                basis=nonliner_reduction,
                                color_map=["Spectral", "YlOrRd"],
                                ncols=2,
                                save=False,
                                show=False)
                plt.savefig('.'.join(
                    filter(None,
                           [fileprefix, cluster, "stochastic_genes2.png"])),
                            dpi=dpi)

        if s_genes is not None and g2m_genes is not None:
            scv.tl.score_genes_cell_cycle(adata,
                                          s_genes=s_genes,
                                          g2m_genes=g2m_genes)
            scv.pl.scatter(adata,
                           basis=nonliner_reduction,
                           color=('S_score', 'G2M_score'),
                           smooth=True,
                           perc=[5, 95],
                           save=False,
                           show=True)
            plt.savefig('.'.join(
                filter(None, [fileprefix, "stochastic_cellcycle.png"])),
                        dpi=dpi)

        if recover_dynamics is True or diff_kinetics is True or velocity_with_noise is True:
            adata2 = adata[:, adata.var['stochastic_genes']].copy()
            Ms = adata2.layers["Ms"]
            Mu = adata2.layers["Mu"]
            spliced = adata2.layers["spliced"]
            unspliced = adata2.layers["unspliced"]
            stochastic = adata2.layers["stochastic"]
            variance_stochastic = adata2.layers["variance_stochastic"]
            adata2.layers.clear()
            adata2.layers["Ms"] = Ms
            adata2.layers["Mu"] = Mu
            connectivities = adata2.obsp["connectivities"]
            distances = adata2.obsp["distances"]
            adata2.obsp.clear()
            adata2.obsp["connectivities"] = connectivities

            scv.tl.recover_dynamics(adata2,
                                    var_names='stochastic_genes',
                                    use_raw=False,
                                    n_jobs=n_jobs)
            adata2.obsp["distances"] = distances
            adata2.layers["spliced"] = spliced
            adata2.layers["unspliced"] = unspliced
            adata2.layers["stochastic"] = stochastic
            adata2.layers["variance_stochastic"] = variance_stochastic
            scv.tl.velocity(adata2, mode="dynamical", vkey="dynamical")
            scv.tl.velocity_graph(adata2,
                                  vkey="dynamical",
                                  n_neighbors=n_neighbors,
                                  approx=approx)
            scv.pl.velocity_embedding_stream(adata2,
                                             title="dynamical",
                                             basis=nonliner_reduction,
                                             vkey=["dynamical"],
                                             color=group_by,
                                             smooth=stream_smooth,
                                             density=stream_density,
                                             save=False,
                                             show=True)
            plt.savefig('.'.join(
                filter(None, [fileprefix, "dynamical_stream.png"])),
                        dpi=dpi)
            scv.pl.velocity_embedding(adata2,
                                      title="dynamical",
                                      basis=nonliner_reduction,
                                      vkey=["dynamical"],
                                      color=group_by,
                                      size=20,
                                      arrow_length=arrow_length,
                                      arrow_size=arrow_size,
                                      density=arrow_density,
                                      save=False,
                                      show=True)
            plt.savefig('.'.join(
                filter(None, [fileprefix, "dynamical_arrow.png"])),
                        dpi=dpi)

            scv.tl.velocity_confidence(adata2, vkey="dynamical")
            scv.tl.velocity_pseudotime(adata2, vkey="dynamical")
            scv.pl.scatter(adata2,
                           basis=nonliner_reduction,
                           color=('dynamical_length', 'dynamical_confidence'),
                           cmap='coolwarm',
                           perc=[5, 95],
                           save=False,
                           show=True)
            plt.savefig('.'.join(
                filter(None, [fileprefix, "dynamical_length_confidence.png"])),
                        dpi=dpi)
            scv.pl.scatter(adata2,
                           basis=nonliner_reduction,
                           color='dynamical_pseudotime',
                           cmap='gnuplot',
                           save=False,
                           show=True)
            plt.savefig('.'.join(
                filter(None, [fileprefix, "dynamical_pseudotime.png"])),
                        dpi=dpi)

            scv.tl.latent_time(adata2, vkey="dynamical")
            scv.pl.scatter(adata2,
                           basis=nonliner_reduction,
                           color='latent_time',
                           color_map='gnuplot',
                           save=False,
                           show=True)
            plt.savefig('.'.join(
                filter(None, [fileprefix, "dynamical_latent_time.png"])),
                        dpi=dpi)

            if calculate_dynamical_genes is True:
                scv.tl.rank_dynamical_genes(adata2,
                                            groupby=group_by,
                                            n_genes=dynamical_ngenes)
                df = scv.DataFrame(adata2.uns['rank_dynamical_genes']['names'])
                for cluster in df.columns:
                    #df[0:1].values.ravel()[:12] ### by row
                    scv.pl.scatter(adata,
                                   color=group_by,
                                   basis=df[cluster].values[:6],
                                   size=20,
                                   linewidth=2,
                                   alpha=1,
                                   ylabel="cluster: " + cluster +
                                   "\nunspliced",
                                   add_linfit=True,
                                   add_rug=True,
                                   add_outline=True,
                                   ncols=3,
                                   frameon=True,
                                   save=False,
                                   show=False)
                    plt.savefig('.'.join(
                        filter(None,
                               [fileprefix, cluster, "dynamical_genes1.png"])),
                                dpi=dpi)
                    scv.pl.velocity(adata,
                                    color=group_by,
                                    var_names=df[cluster].values[:6],
                                    size=10,
                                    linewidth=2,
                                    alpha=1,
                                    ylabel="cluster: " + cluster +
                                    "\nunspliced",
                                    add_outline=True,
                                    basis=nonliner_reduction,
                                    color_map=["Spectral", "YlOrRd"],
                                    ncols=2,
                                    save=False,
                                    show=False)
                    plt.savefig('.'.join(
                        filter(None,
                               [fileprefix, cluster, "dynamical_genes2.png"])),
                                dpi=dpi)

            if diff_kinetics is True:
                top_genes = adata2.var['fit_likelihood'].sort_values(
                    ascending=False).index[:100]
                scv.tl.differential_kinetic_test(adata2,
                                                 var_names=top_genes,
                                                 groupby=group_by)
                scv.tl.velocity(adata2,
                                mode="dynamical",
                                vkey="dynamical_kinetics",
                                diff_kinetics=True)
                scv.tl.velocity_graph(adata2,
                                      vkey="dynamical_kinetics",
                                      n_neighbors=n_neighbors,
                                      approx=approx)
                scv.pl.velocity_embedding_stream(adata2,
                                                 title="dynamical_kinetics",
                                                 basis=nonliner_reduction,
                                                 vkey=["dynamical_kinetics"],
                                                 color=group_by,
                                                 smooth=stream_smooth,
                                                 density=stream_density,
                                                 save=False,
                                                 show=True)
                plt.savefig('.'.join(
                    filter(None,
                           [fileprefix, "dynamical_kinetics_stream.png"])),
                            dpi=dpi)
                scv.pl.velocity_embedding(adata2,
                                          title="dynamical_kinetics",
                                          basis=nonliner_reduction,
                                          vkey=["dynamical_kinetics"],
                                          color=group_by,
                                          size=20,
                                          arrow_length=arrow_length,
                                          arrow_size=arrow_size,
                                          density=arrow_density,
                                          save=False,
                                          show=True)
                plt.savefig('.'.join(
                    filter(None,
                           [fileprefix, "dynamical_kinetics_arrow.png"])),
                            dpi=dpi)
                scv.tl.velocity(adata2,
                                mode="stochastic",
                                vkey="stochastic_kinetics",
                                diff_kinetics=True)
                scv.tl.velocity_graph(adata2,
                                      vkey="stochastic_kinetics",
                                      n_neighbors=n_neighbors,
                                      approx=approx)
                scv.pl.velocity_embedding_stream(adata2,
                                                 title="stochastic_kinetics",
                                                 basis=nonliner_reduction,
                                                 vkey=["stochastic_kinetics"],
                                                 color=group_by,
                                                 smooth=stream_smooth,
                                                 density=stream_density,
                                                 save=False,
                                                 show=True)
                plt.savefig('.'.join(
                    filter(None,
                           [fileprefix, "stochastic_kinetics_stream.png"])),
                            dpi=dpi)
                scv.pl.velocity_embedding(adata2,
                                          title="stochastic_kinetics",
                                          basis=nonliner_reduction,
                                          vkey=["stochastic_kinetics"],
                                          color=group_by,
                                          size=20,
                                          arrow_length=arrow_length,
                                          arrow_size=arrow_size,
                                          density=arrow_density,
                                          save=False,
                                          show=True)
                plt.savefig('.'.join(
                    filter(None,
                           [fileprefix, "stochastic_kinetics_arrow.png"])),
                            dpi=dpi)

            if velocity_with_noise is True:
                import numpy as np
                top_genes = adata2.var['fit_likelihood'].sort_values(
                    ascending=False).index[:3]
                adata2.layers['dynamical_with_noise'] = adata2.layers['dynamical'] + \
                    np.random.normal(
                    adata2.layers['dynamical'], scale=adata2.layers['Ms'].std(0))
                scv.tl.velocity_graph(adata2,
                                      gene_subset=top_genes,
                                      vkey='dynamical_with_noise')
                scv.tl.velocity_embedding(adata2,
                                          basis=nonliner_reduction[2:],
                                          vkey='dynamical_with_noise',
                                          autoscale=False)
                scv.pl.velocity_embedding_stream(adata2,
                                                 title="dynamical_with_noise",
                                                 basis=nonliner_reduction,
                                                 vkey=["dynamical_with_noise"],
                                                 color=group_by,
                                                 smooth=stream_smooth,
                                                 density=stream_density,
                                                 save=False,
                                                 show=True)
                plt.savefig('.'.join(
                    filter(None,
                           [fileprefix, "dynamical_with_noise_stream.png"])),
                            dpi=dpi)
                scv.pl.velocity_embedding(adata2,
                                          title="dynamical_with_noise",
                                          basis=nonliner_reduction,
                                          vkey=["dynamical_with_noise"],
                                          color=group_by,
                                          size=20,
                                          arrow_length=arrow_length,
                                          arrow_size=arrow_size,
                                          density=arrow_density,
                                          save=False,
                                          show=True)
                plt.savefig('.'.join(
                    filter(None,
                           [fileprefix, "dynamical_with_noise_arrow.png"])),
                            dpi=dpi)

            import os
            adata2.write('.'.join(filter(None,
                                         [fileprefix, "dynamical.h5ad"])),
                         compression='gzip')

    finally:
        os.chdir(prevdir)

    try:
        adata.__dict__['_raw'].__dict__['_var'] = adata.__dict__[
            '_raw'].__dict__['_var'].rename(columns={'_index': 'features'})
    except:
        pass

    return adata
示例#15
0
        n1 = ns.n_atlas
        n = ds.n_samples
        knn = {'row': [], 'col': [], 'val': []}
        for e in ns.graph.es:
            v1, v2 = e.source - n1, e.target - n1
            if (v1 > 0) and (v2 > 0):
                knn['row'].append(v1)
                knn['col'].append(v2)
                knn['val'].append(1)
        knn = sp.sparse.coo_matrix((knn['val'], (knn['row'], knn['col'])),
                                   shape=(n, n),
                                   dtype=int)

        import scvelo as scv
        fn_velocity = '../../data/sequencing/me1/velocity_me1.loom'
        adata = scv.read(fn_velocity, cache=True)
        adata.obs.index = adata.obs.index.str.slice(4, -1) + '-1'
        adata.var_names_make_unique()

        scv.pp.filter_and_normalize(adata,
                                    min_shared_counts=20,
                                    n_top_genes=2000)
        scv.pp.moments(adata, n_pcs=25, n_neighbors=10)

        scv.tl.velocity(adata)
        scv.tl.velocity_graph(adata)

        ds.query_samples_by_name(adata.obs_names, inplace=True)
        ds.samplesheet['northstar_assignment'] = dsme.samplesheet.loc[
            ds.samplenames, 'northstar_assignment']
        adata.obsm['X_umap'] = ds.samplesheet.loc[adata.obs_names,
示例#16
0
import pandas as pd
import scanpy as sc
import scvelo as scv
sc.settings.verbosity = 3 
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')
scv.logging.print_version()
scv.settings.verbosity = 3  # show errors(0), warnings(1), info(2), hints(3)
scv.settings.presenter_view = True  # set max width size for presenter view
scv.settings.set_figure_params('scvelo')  # for beautified visualization
results_file = 'results_HL.h5ad'
adata_r = sc.read_10x_mtx('/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/filtered_feature_bc_matrix/', var_names='gene_symbols',cache=False)
adata_r.var_names_make_unique()
adata_r

velocity = scv.read("/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/Parent_NGSC3_DI_HodgkinsLymphoma_possorted_genome_bam_JLA4X.loom")

adata = scv.utils.merge(adata_r, velocity)

adata_r = scv.utils.merge(adata_r, velocity)

sc.pp.filter_cells(adata_r, min_genes=200)
sc.pp.filter_genes(adata_r, min_cells=3)
sc.pp.filter_cells(adata_r, max_counts=39766)
sc.pp.filter_cells(adata_r, max_genes=5942)
adata_r.var['mt'] = adata_r.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata_r, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
adata = adata_r[adata_r.obs.pct_counts_mt < 10, :]

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
示例#17
0
def main(argv):
    print("SEUROCITY v1.0.0, (c) 2020 Richard A. Guyer, MD, PhD\n")
    
    # default for rscript_dir to pass to run_rscript function
    rscript_dir = None
    input_file = "input.rds"
    
    # handle arguments to adjust default settings
    try:
        opts, args = getopt.getopt(argv,"hlr:w:")
    except getopt.GetoptError:
        arg_error()
        sys.exit(1)
    for opt, arg in opts:
        if opt == '-h':
            display_help()
            sys.exit(0)
        elif opt in ("-l"):
            display_license()
            sys.exit(0)
        elif opt in ("-r"):
            rscript_dir = arg
        elif opt in ("-w"):
            os.chdir(arg)
        elif opt in ("-i"):
            input_file = arg
    
    working_dir = os.getcwd() + "/"
    input_dir = working_dir + "inputs/"
    output_dir = working_dir + "outputs/"

	# check for files required by extractSeurat.R, run if all are present
    required_files_for_R = [input_dir + input_file,
                            input_dir + "idents.txt",
                            input_dir + "reductions.txt",
                            input_dir + "append.txt", 
                            working_dir + "extractSeurat.R"]
    if not files_exist(required_files_for_R):
        print("ERROR: Critical files not found in expected locations")
        print("Please ensure proper input file structure")
        print("For help: python Seurocity.py -h")
        print("")
        sys.exit(1)
    else:
        run_rscript(working_dir + "extractSeurat.R", [input_file,"idents.txt","reductions.txt",
                                                      "append.txt"], rscript_path=rscript_dir)

	# get sample IDs and reductions output by Rscript
    sample_ids = get_ids()
    reductions = get_reductions()
    
    # check whether expected loom files exist
    expected_looms = [input_dir + ident + ".loom" for ident in sample_ids]
    if not files_exist(expected_looms):
        print("ERROR: Expected loom files not found in ./inputs")
        print("Please ensure proper input file structure")
        print("For help: python Seurocity.py -h")
        print("")
        sys.exit(1)  
    else:
        print("\nLoading files and processing AnnData objects, this may take a few minutes")
        samples = load_samples(sample_ids)
        samples = load_pca_data(samples)
        samples = import_seur_data(samples, sample_ids, reductions)
        
        # ensure every sample has the same list of genes
        if len(sample_ids) > 1:
            samples = same_genes(samples, sample_ids)
    
        # save main AnnData object for each sample as a loom file 
        comment = "\nSaving main AnnData for each sample as loom files"
        print(comment)
        if os.path.exists(output_dir + "proc_loom"):
            comment = "- WARNING: ./outputs/proc_loom/ exists, files may be overwritten"
            print(comment)
        else:
            os.mkdir(output_dir + "proc_loom")
            
        for s in sample_ids:
            savename = output_dir + "proc_loom/" + s + "_proc.loom"
            comment = "- Saving sample " + s + " to: " + savename
            print(comment)
            samples[s]['main'].varm['PCs'] = np.asarray(samples[s]['main'].varm['PCs']) # currenty is an ArrayView, need to make into numpy array
            samples[s]['main'].write_loom(savename, write_obsm_varm = True)
        
        # remove samples to clean up memory
        del(samples)
        
        # generate combined loom file and import pca data
        #   generate combined file
        comment = "\nGenerating combined loom file with PCA data loaded"
        if os.path.exists(output_dir + "comb_loom"):
            comment = "- WARNING: ./outputs/comb_loom/ exists, combined.loom will be overwritten if it already exists"
            print(comment)
        else:
            os.mkdir(output_dir + "comb_loom")       
            
        processed_files = os.listdir(output_dir + "proc_loom/")
        processed_files = [output_dir + "proc_loom/" + p for p in processed_files]
        lp.combine(processed_files, output_dir + "comb_loom/combined.loom")
        
        #   load combined loom and pca data files
        combined = scv.read(output_dir + "comb_loom/combined.loom", cache = False)
        pca_var = scv.read(output_dir + "seurat_dat/seur_pca_var.csv")
        pca_load = scv.read(output_dir + "seurat_dat/seur_pca_loadings.csv")
        
        #   variance data 
        combined.uns['pca'] = {}
        combined.uns['pca'][pca_var.obs.index[0]] = pca_var.X[0]
        combined.uns['pca'][pca_var.obs.index[1]] = pca_var.X[1]
        
        #   pca loadings
        genes = combined.var.index.tolist()
        combined.varm['PCs'] = np.asarray(pca_load[genes,:].X)
        
        #   save combined, now containing pca loadings and variance data
        combined.write_loom(output_dir + "comb_loom/combined.loom", write_obsm_varm = True)
示例#18
0
import os
import re
import sys
from pathlib import Path
import numpy as np
import scipy

os.chdir(
    r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/Seurat_based"
)
seur_loc = r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/R_references/Seurat_integration/"

namez = pd.read_csv(seur_loc + 'spliced_seur.csv', index_col=0).index
namez = namez.to_list()
s = scv.read(seur_loc + 'spliced_seur.csv',
             cache=True,
             first_column_names=True,
             index_col=0)
s = s.transpose()
u = scv.read(seur_loc + 'unspliced_seur.csv',
             cache=True,
             first_column_names=True)
u = u.transpose()

adata = s
adata.layers['spliced'] = s.X
adata.layers['unspliced'] = u.X

n = pd.read_csv(seur_loc + 'neighbors_seur.csv', index_col=0)
n = n - 1
adata.uns.neighbors = n
adata.var_names = namez
def read_counts_and_phases(count_or_rpkm, use_spike_ins, biotype_to_use, u_plates, use_isoforms=False, load_velocities=False):
    '''
    Read data into scanpy; Read phases and FACS intensities
        - count_or_rpkm: Must be "Counts" or "Tpms"
    '''
    read_file = f"input/RNAData/{count_or_rpkm}{'_Isoforms' if use_isoforms else ''}.csv" + (".ercc.csv" if use_spike_ins else "")
    if biotype_to_use != None and len(biotype_to_use) > 0:
        print(f"filtering for biotype: {biotype_to_use}")
        biotype_file = f"{read_file}.{biotype_to_use}.csv"
        if not os.path.exists(biotype_file):
            gene_info = pd.read_csv(f"input/RNAData/IdsToNames{'_Isoforms' if use_isoforms else ''}.csv.gz", 
                                    index_col=False, header=None, names=["gene_id", "name", "biotype", "description"])
            biotyped = gene_info[gene_info["biotype"] == biotype_to_use]["gene_id"]
            pd.read_csv(read_file)[biotyped].to_csv(biotype_file, index=False)
        read_file = biotype_file

    adata = sc.read_csv(read_file)
    print(f"data shape: {adata.X.shape}")
    if load_velocities:
        adata.obs_names = pd.read_csv("input/RNAData/Tpms.obs_names.csv")["well_plate"]

    intensities, phases = [],[]
    for plate in u_plates:
        file = f"input/RNAData/180911_Fucci_single cell seq_ss2-18-{plate}_index sort export.csv"
        plateIntensities = pd.read_csv(file, skiprows=2)
        newColumns = list(plateIntensities.columns)
        newColumns[5] = "MeanGreen530"
        newColumns[6] = "MeanRed585"
        plateIntensities.columns = newColumns
        plateIntensities["Plate"] = [plate] * len(plateIntensities)
        plateIntensities["Well_Plate"] = [f"{w}_{plate}" for w in plateIntensities["Well"]]
        intensitiesSubFrame = plateIntensities[plateIntensities["Population"] == "All Events"]
        if len(intensities) == 0: intensities = intensitiesSubFrame
        else: intensities = intensities.append(intensitiesSubFrame, ignore_index=True)
        isPhaseRow = ~plateIntensities["Population"].isin(["All Events", "Cells", "Singlets"])
        phasesSubFrame = plateIntensities[isPhaseRow & (plateIntensities["% Total"] == "100.00%")]
        if len(phases) == 0: phases = phasesSubFrame
        else: phases = phases.append(phasesSubFrame, ignore_index=True)
    wp_idx = list(phases.columns).index("Well_Plate")
    pop_idx = list(phases.columns).index("Population")
    phases_lookup = dict([(row[1][wp_idx], row[1][pop_idx]) for row in phases.iterrows()])
            
    # Assign phases and log intensities; require log intensity
    intensities = intensities.sort_values(by="Well_Plate")
    adata.obs["Well_Plate"] = np.array(intensities["Well_Plate"])
    adata.obs["plate"] = np.array(intensities["Plate"])
    adata.obs["phase"] = np.array([phases_lookup[wp] if wp in phases_lookup else "N/A" for wp in intensities["Well_Plate"]])
    adata.obs["MeanGreen530"] = np.array(intensities["MeanGreen530"])
    adata.obs["MeanRed585"] = np.array(intensities["MeanRed585"])
    adata = adata[pd.notnull(adata.obs["MeanGreen530"]) & pd.notnull(adata.obs["MeanRed585"])] # removes 6 dark likely mitotic cells
    
    # Read in fucci pseudotime from previous analysis
    if os.path.isfile("output/fucci_time.csv"):
        adata.obs["fucci_time"] = np.array(pd.read_csv("output/fucci_time.csv")["fucci_time"])

    # Get info about the genes
    gene_info = pd.read_csv(f"input/RNAData/IdsToNames{'_Isoforms' if use_isoforms else ''}.csv.gz", 
                            header=None, names=["name", "biotype", "description"], index_col=0)
    adata.var["name"] = gene_info["name"]
    adata.var["biotype"] = gene_info["biotype"]
    adata.var["description"] = gene_info["description"]
    
    if load_velocities:
        ldata = scv.read("input/RNAData/a.loom", cache=True)
        ldata.obs_names = pd.read_csv("input/RNAData/a.obs_names.csv")["well_plate"]
        ldata.var["GeneName"] = ldata.var_names
        ldata.var_names = ldata.var["Accession"]
        adata = scv.utils.merge(adata, ldata, copy=True)

    return adata, phases
示例#20
0
# %%
import scvelo as scv
import pandas as pd
import numpy as np
import matplotlib as plt
# %%

sample_one = scv.read("../test/G328E2L2_scRNAseq_G328E2L3_CITEseq.loom",
                      cache=False)
# sample_one = sample_one.var_names_make_unique
# ....
# sample_n = anndata.read_loom("sample_n.loom")
# %%
sample_obs = pd.read_csv("../test/cellID_obs.csv")
cell_clusters = pd.read_csv("../test/cell_clusters.csv")

#%%
sample_one.obs = sample_one.obs.rename(
    index=lambda x: x.split(":")[-1].replace("x", "-1"))
sample_one.obs.head()
# %%
sample_one = sample_one[np.isin(sample_one.obs.index, sample_obs["x"])]
sample_one.obs.head()
# %%
# Now that we have our Velocity file filtered based upon our Seurat object, we can go ahead and add UMAP coordinates. We'll first upload them:
umap = pd.read_csv("../test/cell_embeddings.csv")
#%%
# With the coordinates, we will need to make sure we add them so they match the order of the Cell IDs in our anndata object. Our Cell IDs are rownames in the observation layer of our object, so we can view them by using the following:

sample_one.obs.index
# Let's cast our index as a data frame and change the column name
sys.stderr.write("beginning scvelo!")
velocity_loom = snakemake.input.velocity_loom
seurat_loom = snakemake.input.seurat_loom

sample_batch = snakemake.params.seurat_sample

out_object = snakemake.output.out_object
out_dir = os.path.dirname(out_object)
#walkthrough
#https://colab.research.google.com/github/theislab/scvelo_notebooks/blob/master/VelocityBasics.ipynb#scrollTo=iHl8jdCUd1j8

#scvelo documentation
#https://readthedocs.org/projects/scvelo/downloads/pdf/latest/

#ds = loompy.connect(seurat_loom,mode = "r") #seurat object to loom ... in r 'as.loom(seuratObj, filename = "seuratObj.loom")
adata = scv.read(velocity_loom)

#remove cell id duplicates
adata.obs_names_make_unique("-")

non_duplicates = [x for x in adata.obs_names if "-" not in x]

adata = adata[adata.obs_names.isin(non_duplicates)]

#make gene names unique
adata.var_names_make_unique("-")

os.chdir(out_dir)
#matplotlib settings to 'upgraded' images
scv.set_figure_params('scvelo')
scv.logging.print_version()
示例#22
0
    sc.pl.umap(a, color=['tp'], save='_' + r + '_tp')
    sc.pl.umap(a, color=['ActualRegion'], save='_' + r + '_ActualRegion')
    a.obs = adata.obs.loc[a.obs.index, :]
    sc.pl.umap(a, color=['subclassname'], save='_' + r + '_Subclass')

###Now the velocity, using the already projected data

velodirs = [
    "E65-2019A_AND_E65-2019B_MULTI-SEQ_1_Out_velocyto",
    "E65-2019A_AND_E65-2019B_MULTI-SEQ_2_Out_velocyto",
    "E65-2019A_AND_E65-2019B_MULTI-SEQ_3_Out_velocyto",
    "E80-2019_MULTI-SEQ_Out_velocyto", "E90-2019_MULTI-SEQ_Out_velocyto"
]
velofiles = [os.listdir(os.path.join(headpath, x))[0] for x in velodirs]
velopaths = [os.path.join(headpath, x, y) for x, y in zip(velodirs, velofiles)]
velolooms = [scv.read(x, cache=True) for x in velopaths]
for i in range(len(velolooms)):
    velolooms[i].obs.index = [
        re.sub("-1", "", x) for x in velolooms[i].obs.index
    ]
    velolooms[i].obs.insert(0, 'batch', velodirs[i])
vdata = sc.AnnData.concatenate(*velolooms)
print(vdata.obs)
vdata.var_names_make_unique()
avdata = scv.utils.merge(adata, vdata)
print(avdata.obs)
print(adata.obs)

avdata.var_names_make_unique()
print('norm')
scv.pp.filter_genes(avdata)
示例#23
0
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from matplotlib import pyplot as plt
from matplotlib import rcParams

##### Setup scVelo
sc.settings.verbosity = 3
sc.set_figure_params(dpi=80, color_map='viridis')
scv.settings.set_figure_params('scvelo')


##### Load Data
LoomFile=sys.argv[1]
OUTFILE_PREFIX=sys.argv[2]

Experiment = OUTFILE_PREFIX
adata = scv.read(LoomFile, cache=True)
adata.var_names_make_unique()

cellnames = adata.obs_names
df = pd.DataFrame(adata.obs_names)
df.to_csv('Samples.txt', index=False)

anno = pd.read_csv('Annotations.txt')                ###################### Example of Annotations.txt is available on GitHub
adata.obs = anno


#### Basic Filtering
sc.pp.filter_cells(adata, min_genes=0)
sc.pp.filter_genes(adata, min_cells=0)

##### Normalization
示例#24
0
#import library
import scvelo as scv

#set parameters
scv.set_figure_params()
scv.settings.verbosity = 3  # show errors(0), warnings(1), info(2), hints(3)
scv.settings.presenter_view = True  # set max width size for presenter view
scv.settings.set_figure_params('scvelo')  # for beautified visualization

#load data
adata_merged = scv.read("/rsrch3/scratch/sarc_med_onco-rsch/dtruong4/LPS_scRNA/LPS_data_cell_velocyto.h5ad")

print('Running recover_dynamics')
scv.tl.recover_dynamics(adata_merged, n_jobs = 20)

print('Running velocity')
scv.tl.velocity(adata_merged, mode='dynamical')

print('Running velocity_graph')
scv.tl.velocity_graph(adata_merged, n_jobs = 20)

print('Writing data')
adata_merged.write("/rsrch3/scratch/sarc_med_onco-rsch/dtruong4/LPS_scRNA/LPS_data_cell_velocyto_calc_dynamic.h5ad")

print('Job done')
示例#25
0
import scvelo as scv
scv.settings.set_figure_params('scvelo')
import scanpy.api as sc
sc.settings.autoshow=False
sc.settings.autosave=True
sc.settings.figdir='/scrapp2/mtschmitz/data/Exonic/fig'
adata = sc.read_10x_mtx('/scrapp2/mtschmitz/data/Exonic/E40_motor_Out/outs/filtered_gene_bc_matrices/refdata-celranger-mmul8-toplevel/', cache=True)
ldata = scv.read('/scrapp2/mtschmitz/data/Exonic/E40_motor_Out_velocyto/possorted_genome_bam_RWRQ2.loom', cache=True)
adata.var_names_make_unique()
ldata.var_names_make_unique()
adata = scv.utils.merge(adata, ldata)
adata.var_names_make_unique()
print('norm')
scv.pp.filter_genes(adata)
scv.pp.normalize_per_cell(adata)
scv.pp.filter_genes_dispersion(adata)
scv.pp.log1p(adata)
print(adata)
print('moment')
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)
print('velo')
scv.tl.umap(adata)
scv.tl.velocity(adata)
print('graph')
scv.tl.velocity_graph(adata)
scv.tl.velocity_embedding(adata, basis='umap')
scv.pl.velocity_embedding(adata, basis='umap',save='Embed')
scv.pl.velocity_embedding_grid(adata, basis='umap',save='Grid')
scv.pl.velocity_embedding_stream(adata, basis='umap',save='stream')
sc.tl.leiden(adata)
示例#26
0
def main():
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-f",
                        "--filename",
                        dest="filename",
                        default=None,
                        required=True,
                        help="Analysis result file name",
                        metavar="FILE")
    parser.add_argument(
        "-t",
        "--toolname",
        dest="toolname",
        default=None,
        required=True,
        type=str.lower,
        choices=['scanpy', 'paga', 'seurat', 'stream', 'velocity'],
        help="Tool used to generate the analysis result.")
    parser.add_argument(
        "-a",
        "--annotations",
        dest="annotations",
        default=None,
        required=True,
        help=
        "Annotation file name. It contains the cell annotation key(s) to visualize in one column."
    )
    parser.add_argument(
        "-g",
        "--genes",
        dest="genes",
        default=None,
        help=
        "Gene list file name. It contains the genes to visualize in one column."
    )
    parser.add_argument("-o",
                        "--output",
                        dest="output",
                        default='vr_report',
                        help="Output folder name")
    parser.add_argument(
        "--layer",
        dest="layer",
        default='norm_data',
        help="The name of layer in Anndata object for gene expression")

    args = parser.parse_args()
    filename = args.filename
    toolname = args.toolname
    genes = args.genes
    output = args.output  #work directory
    annotations = args.annotations
    layer = args.layer

    if (annotations is None):
        raise Exception(
            "Annotation file must be specified when %s is chosen." %
            (toolname))

    if toolname != 'velocity':
        try:
            ann_list = pd.read_csv(annotations,
                                   sep='\t',
                                   header=None,
                                   index_col=None).iloc[:, 0].tolist()
        except FileNotFoundError as fnf_error:
            print(fnf_error)
            raise
        except:
            print('Failed to load in annotation file.')
            raise
        else:
            ann_list = list(set(ann_list))

    if (genes is not None):
        try:
            gene_list = pd.read_csv(genes,
                                    sep='\t',
                                    header=None,
                                    index_col=None).iloc[:, 0].tolist()
        except FileNotFoundError as fnf_error:
            print(fnf_error)
            raise
        except:
            print('Failed to load in gene list.')
            raise
        else:
            gene_list = list(set(gene_list))
    else:
        gene_list = None

    print("Converting '%s' analysis result ..." % toolname)

    if (toolname in ['scanpy', 'paga', 'seurat']):
        if (toolname == 'scanpy'):
            assert (filename.lower().endswith(
                ('.h5ad'))), "For PAGA only .h5ad file is supported."
            print('reading in h5ad file ...')
            adata = ad.read_h5ad(filename)
            scvr.output_scanpy_cells(adata,
                                     ann_list,
                                     gene_list=gene_list,
                                     reportdir=output)
        if (toolname == 'paga'):
            assert (filename.lower().endswith(
                ('.h5ad'))), "For PAGA only .h5ad file is supported."
            print('reading in h5ad file ...')
            adata = ad.read_h5ad(filename)
            scvr.output_paga_graph(adata, reportdir=output)
            scvr.output_paga_cells(adata,
                                   ann_list,
                                   gene_list=gene_list,
                                   reportdir=output)
        if (toolname == 'seurat'):
            assert (filename.lower().endswith(
                ('.loom'))) or (filename.lower().endswith(
                    ('.h5ad'
                     ))), "For Seurat only .loom .h5ad file is supported."
            print('reading in loom file ...')
            if filename.lower().endswith(('.loom')):
                adata = ad.read_loom(filename)
            else:
                adata = ad.read(filename)
            scvr.output_seurat_cells(adata,
                                     ann_list,
                                     gene_list=gene_list,
                                     reportdir=output)
        with open(os.path.join(output, 'index.json'), 'w') as f:
            json.dump({"tool": toolname}, f)
        shutil.make_archive(base_name=output, format='zip', root_dir=output)
        shutil.rmtree(output)
    if toolname == 'velocity':
        assert (filename.lower().endswith('.h5ad')
                or filename.lower().endswith('.loom')
                ), 'Velocity supports .h5ad or .loom.'
        adata = scv.read(filename)
        scvr.output_velocity_cells(adata,
                                   ann_field=annotations,
                                   gene_list=gene_list,
                                   reportdir=output)
    if (toolname == 'stream'):
        try:
            import stream as st
        except ImportError:
            raise ImportError(
                'Please install STREAM >=0.5: `conda install -c bioconda stream`.'
            )
        assert (filename.lower().endswith(
            ('.pkl'))), "For STREAM only .pkl file is supported."
        print('reading in pkl file ...')
        adata = st.read(filename, file_format='pkl', workdir='./')
        st.save_vr_report(adata,
                          ann_list=ann_list,
                          gene_list=gene_list,
                          file_name=output)
示例#27
0
for file in file_list:
    name = re.sub("_.+", "", file)
    con_dir[name] = subset_anndata(file, csv_loc + name)

concat = anndata.concat(con_dir, axis=0, label="dataset")
path = Path(
    r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/" +
    r"Concat_raw.h5ad")
concat.write_h5ad(filename=path, )
del concat
del con_dir
del file_list

loc = r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/Raw_based/"
os.chdir(loc)
adata = scv.read(path)
adata.obs.dataset = [x for x in adata.obs.dataset]
new_index = []
for ob in range(len(adata.obs.index)):
    cell = adata.obs.index[ob]
    dataset = adata.obs["dataset"][ob]
    n_ind = cell + "_" + dataset
    new_index.append(n_ind)
adata.obs.index = new_index

# adding seurat data
csv_loc = r"C:/Users/USER/Documents/R/RNAseq/scientificProject/data/Scadden/R_references/"
seurat_meta = pd.read_csv(csv_loc + "seurat_meta.csv", index_col=0)

seurat_meta[["origin", "cells",
             "clusters"]] = seurat_meta[["origin", "cells",
示例#28
0
import scvelo as scv
import pandas as pd
import numpy as np
from scipy import sparse
import os

os.chdir(r"C:\Users\USER\Documents\R\RNAseq\scientificProject\data\combined")

scv.settings.set_figure_params('scvelo')

s = scv.read('sub_spliced.csv', cache=True, first_column_names=True)
s = s.transpose()
u = scv.read('sub_unspliced.csv', cache=True)
u = u.transpose()

adata = s
adata.layers['spliced'] = s.X
adata.layers['unspliced'] = u.X

m = pd.read_csv("combined_meta.csv")
adata.obs['cell_cluster'] = list(m['seurat_clusters'])
adata.obs['cell_cluster'] = adata.obs['cell_cluster'].astype('category')

UMAP_D = m[['UMAP_1', 'UMAP_2']]
adata.obsm['X_umap_ori'] = np.asanyarray(UMAP_D)

del u
del s
del m

scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
示例#29
0
    if False:
        print('Analyze RNA velocity')
        csts = [
            'Early Car4- capillaries',
            'Late Car4- capillaries',
        ]
        dsi = ds.query_samples_by_metadata(
                'cellSubtype in @csts',
                local_dict=locals(),
                inplace=False)

        print('Load combined velocity file')
        fn_combined = '../../data/sequencing/datasets/all_{:}/velocity_endo.loom'.format(version)
        import scvelo as scv
        adata_endo = scv.read(fn_combined, cache=True)
        adata_endo.var_names_make_unique()

        print('Restrict to subtypes')
        adata = adata_endo[dsi.samplenames]

        print('Follow tutorial')
        # show proportions of spliced/unspliced abundances
        #scv.utils.show_proportions(adata)

        scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
        scv.pp.moments(adata, n_pcs=25, n_neighbors=10)

        scv.tl.velocity(adata)

        scv.tl.velocity_graph(adata)
示例#30
0
get_ipython().run_cell_magic(
    'R', '',
    '# Load all the R libraries we will be using in the notebook\nlibrary(scran)\nlibrary(RColorBrewer)\nlibrary(slingshot)\nlibrary(monocle)\nlibrary(gam)\nlibrary(clusterExperiment)\nlibrary(ggplot2)\nlibrary(plyr)\nlibrary(MAST)'
)

scv.settings.set_figure_params('scvelo')

# split into 3 separate objects
TLS = adata[adata.obs['donor'].isin(['TLS'])]
Gastruloid = adata[adata.obs['donor'].isin(['Gastruloid'])]
TLSCL = adata[adata.obs['donor'].isin(['TLSCL'])]

# Read and merge velocity
TLS_loom_120 = scv.read("TLS_120h/velocyto/TLS_120h.loom",
                        sparse=True,
                        cache=True)
TLS_loom_120.var_names_make_unique()
Gastruloid_loom_120 = scv.read("Gastruloid/velocyto/Gastruloid.loom",
                               sparse=True,
                               cache=True)
Gastruloid_loom_120.var_names_make_unique()
TLSCL_loom_120 = scv.read("TLSCL/velocyto/TLSCL.loom", sparse=True, cache=True)
TLSCL_loom_120.var_names_make_unique()

## merge loom file into an already existing object
TLS = scv.utils.merge(TLS, TLS_loom_120)
TLS.var_names_make_unique()
TLS.obs_names_make_unique()

Gastruloid = scv.utils.merge(Gastruloid, Gastruloid_loom_120)