Пример #1
0
def read_and_qc(sample_name, path):
    r""" This function reads the data for one 10X spatial experiment into the anndata object.
    It also calculates QC metrics. Modify this function if required by your workflow.

    :param sample_name: Name of the sample
    :param path: path to data
    """

    adata = sc.read_visium(path + str(sample_name),
                           count_file='filtered_feature_bc_matrix.h5', load_images=True)

    adata.obs['sample'] = sample_name
    adata.var['SYMBOL'] = adata.var_names
    adata.var.rename(columns={'gene_ids': 'ENSEMBL'}, inplace=True)
    adata.var_names = adata.var['ENSEMBL']
    adata.var.drop(columns='ENSEMBL', inplace=True)

    # Calculate QC metrics
    sc.pp.calculate_qc_metrics(adata, inplace=True)
    adata.var['mt'] = [gene.startswith('mt-') for gene in adata.var['SYMBOL']]
    adata.obs['mt_frac'] = adata[:, adata.var['mt'].tolist()].X.sum(1).A.squeeze()/adata.obs['total_counts']

    # add sample name to obs names
    adata.obs["sample"] = [str(i) for i in adata.obs['sample']]
    adata.obs_names = adata.obs["sample"] \
                          + '_' + adata.obs_names
    adata.obs.index.name = 'spot_id'

    return adata
Пример #2
0
def test_visium_default(image_comparer):  # default values
    save_and_compare_images = image_comparer(ROOT, FIGS, tol=15)
    adata = sc.read_visium(HERE / '_data' / 'visium_data' / '1.0.0')
    adata.obs = adata.obs.astype({'array_row': 'str'})

    sc.pl.spatial(adata, show=False)

    save_and_compare_images('master_spatial_visium_default')
Пример #3
0
def test_visium_empty_img_key(image_comparer):
    save_and_compare_images = image_comparer(ROOT, FIGS, tol=15)
    adata = sc.read_visium(HERE / '_data' / 'visium_data' / '1.0.0')
    adata.obs = adata.obs.astype({'array_row': 'str'})

    sc.pl.spatial(adata, img_key=None, color="array_row")

    save_and_compare_images('master_spatial_visium_empty_image')

    sc.pl.embedding(adata, basis="spatial", color="array_row")
    save_and_compare_images('master_spatial_visium_embedding')
Пример #4
0
def test_visium_circles(image_comparer):
    save_and_compare_images = image_comparer(ROOT, FIGS, tol=15)
    adata = sc.read_visium(HERE / '_data' / 'visium_data' / '1.0.0')
    adata.obs = adata.obs.astype({'array_row': 'str'})

    sc.pl.spatial(
        adata,
        color="array_row",
        groups=["24", "33"],
        crop_coord=(100, 400, 400, 100),
        alpha=0.5,
        size=1.3,
    )

    save_and_compare_images('master_spatial_visium')
Пример #5
0
def test_spatial_external_img(image_comparer):  # external image
    save_and_compare_images = image_comparer(ROOT, FIGS, tol=15)
    adata = sc.read_visium(HERE / '_data' / 'visium_data' / '1.0.0')
    adata.obs = adata.obs.astype({'array_row': 'str'})

    img = adata.uns["spatial"]["custom"]["images"]["hires"]
    scalef = adata.uns["spatial"]["custom"]["scalefactors"]["tissue_hires_scalef"]
    sc.pl.spatial(
        adata,
        color="array_row",
        scale_factor=scalef,
        img=img,
        basis="spatial",
        show=False,
    )
    save_and_compare_images('master_spatial_external_img')
Пример #6
0
def test_spatial_general(image_comparer):  # general coordinates
    save_and_compare_images = image_comparer(ROOT, FIGS, tol=15)
    adata = sc.read_visium(HERE / '_data' / 'visium_data' / '1.0.0')
    adata.obs = adata.obs.astype({'array_row': 'str'})
    spatial_metadata = adata.uns.pop(
        "spatial")  # spatial data don't have imgs, so remove entry from uns
    # Required argument for now
    spot_size = list(
        spatial_metadata.values())[0]["scalefactors"]["spot_diameter_fullres"]

    sc.pl.spatial(adata, show=False, spot_size=spot_size)
    save_and_compare_images('master_spatial_general_nocol')

    # category
    sc.pl.spatial(adata, show=False, spot_size=spot_size, color="array_row")
    save_and_compare_images('master_spatial_general_cat')

    # continuous
    sc.pl.spatial(adata, show=False, spot_size=spot_size, color="array_col")
    save_and_compare_images('master_spatial_general_cont')
Пример #7
0
def preprocess_spdata_single(data_folder, sample_name):
    adata = sc.read_visium(os.path.join(data_folder, sample_name), \
                           count_file="filtered_feature_bc_matrix.h5", load_images=True)

    adata.obs["sample"] = sample_name
    adata.var["SYMBOL"] = adata.var_names
    adata.var.rename(columns={"gene_ids": "ENSEMBL"}, inplace=True)
    adata.var_names = adata.var["ENSEMBL"]
    adata.var.drop(columns="ENSEMBL", inplace=True)

    # Calculate QC metrics
    sc.pp.calculate_qc_metrics(adata, inplace=True)
    adata.var["mt"] = [gene.startswith("mt-") for gene in adata.var["SYMBOL"]]
    adata.obs["mt_frac"] = adata[:, adata.var["mt"].tolist()].X.sum(1).A.squeeze()/adata.obs["total_counts"]

    # add sample name to obs names
    adata.obs["sample"] = [str(i) for i in adata.obs["sample"]]
    adata.obs_names = adata.obs["sample"] \
                          + "_" + adata.obs_names
    adata.obs.index.name = "spot_id"

    return adata
Пример #8
0
def main():
    prs = arp.ArgumentParser()

    prs.add_argument('sp_data_path', type=str, help='path to spatial data')

    prs.add_argument('result_dir',
                     type=str,
                     help='directory to regression model and results')

    prs.add_argument('cuda_device',
                     type=str,
                     help="index of cuda device ID, from 0-7")

    prs.add_argument('-a',
                     '--annotation_column',
                     default='celltype',
                     type=str,
                     help='column name for covariate')

    prs.add_argument('-r',
                     '--regression_model_path',
                     default=None,
                     type=str,
                     help='path to regression model')

    prs.add_argument('-s',
                     '--slide',
                     default="1",
                     type=str,
                     help='select slide 1-4, or all')

    args = prs.parse_args()

    cuda_device = args.cuda_device
    sp_data_path = args.sp_data_path
    results_folder = args.result_dir
    covariate_col_names = args.annotation_column
    slide = args.slide

    if args.regression_model_path is None:
        regression_model_output = os.listdir(results_folder +
                                             "/regression_model")[0]
        reg_path = f'{results_folder}regression_model/{regression_model_output}/'
    else:
        reg_path = args.regression_model_path

    assert cuda_device in ["0", "1", "2", "3", "4", "5", "6",
                           "7"], "invalid device id"
    assert slide in ["1", "2", "3", "4"
                     ] or slide == "all", "slide does not exist"
    if slide.isdigit():
        assert 'filtered_feature_bc_matrix.h5' in os.listdir(
            sp_data_path + "/JBO0" +
            slide), "file path does not contain h5 feature matrix"
    else:
        assert all('filtered_feature_bc_matrix.h5' in os.listdir(sp_data_path + "/JBO0" + str(i)) for i in range(1,5)),\
        "one or more file path does not contain h5 feature matrix"

    ##### MAIN PART #####
    os.environ["CUDA_VISIBLE_DEVICES"] = cuda_device
    os.environ["CPATH"] = "/usr/local/cuda/include:$CPATH"  #To use cuDNN

    import sys
    import scanpy as sc
    import anndata
    import pandas as pd
    import numpy as np

    data_type = 'float32'

    # this line forces theano to use the GPU and should go before importing cell2location
    os.environ[
        "THEANO_FLAGS"] = 'device=cuda,floatX=' + data_type + ',force_device=True'

    import cell2location
    import matplotlib as mpl
    from matplotlib import rcParams
    import matplotlib.pyplot as plt
    import seaborn as sns
    mpl.use('Agg')

    # silence scanpy that prints a lot of warnings
    import warnings
    warnings.filterwarnings('ignore')

    if not os.path.exists(results_folder + "std_model/"):
        os.makedirs(results_folder + "std_model/")

    ## READ IN SPATIAL DATA ##
    if slide == "all":
        # We will merge all slides together in one adata object
        adata_list, sample_name = [], []
        for i in range(1, 5):
            name = 'JBO0' + str(i)
            temp_adata = sc.read_visium(sp_data_path + "/" + name)
            print("Read in file from " + sp_data_path + "/" + name)
            temp_adata.var_names_make_unique()
            temp_adata.var["mt"] = temp_adata.var_names.str.startswith("mt-")
            sc.pp.calculate_qc_metrics(temp_adata,
                                       qc_vars=["mt"],
                                       inplace=True)
            temp_adata.obs['sample'] = name
            sample_name.append(name)
            adata_list.append(temp_adata)

        adata = adata_list[0].concatenate(adata_list[1:], batch_key="sample", uns_merge="unique", \
                                      batch_categories = sample_name, index_unique=None)
    else:
        adata = sc.read_visium(sp_data_path + "/JBO0" + slide)
        print("Read in file from " + sp_data_path)
        adata.var_names_make_unique()
        adata.obs['sample'] = "JBO0" + slide
        adata.var['mt'] = adata.var_names.str.startswith("mt-")
        sc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], inplace=True)

    adata.obs_names_make_unique()
    # Calculate QC metrics and filter
    print("Before filtering: {} spots and {} genes".format(*adata.shape))
    adata.var['SYMBOL'] = adata.var_names
    sc.pp.filter_cells(adata, min_counts=11000)
    sc.pp.filter_cells(adata, max_counts=50000)
    adata = adata[adata.obs["pct_counts_mt"] < 20]
    sc.pp.filter_genes(adata, min_cells=10)

    # mitochondria-encoded (MT) genes should be removed for spatial mapping
    adata.obsm['mt'] = adata[:, adata.var['mt'].values].X.toarray()
    adata = adata[:, ~adata.var['mt'].values]
    print("After filtering: {} spots and {} genes".format(*adata.shape))

    adata_vis = adata.copy()
    adata_vis.raw = adata_vis

    ## READ IN REFERENCE DATA
    adata_raw = sc.read(f'{reg_path}sc.h5ad')

    # Export cell type expression signatures:
    inf_aver = adata_raw.raw.var.copy()
    inf_aver.index = adata_raw.raw.var['SYMBOL']
    inf_aver = inf_aver.loc[:, [
        f'means_cov_effect_{covariate_col_names}_{i}'
        for i in adata_raw.obs[covariate_col_names].unique()
    ]]
    from re import sub
    inf_aver.columns = [
        sub(f'means_cov_effect_{covariate_col_names}_{i}', '', i)
        for i in adata_raw.obs[covariate_col_names].unique()
    ]
    inf_aver = inf_aver.iloc[:, inf_aver.columns.argsort()]

    # scale up by average sample scaling factor
    inf_aver = inf_aver * adata_raw.uns['regression_mod']['post_sample_means'][
        'sample_scaling'].mean()

    ## RUN CELL2LOCATION ##
    r = cell2location.run_cell2location(

        # Single cell reference signatures as pd.DataFrame
        # (could also be data as anndata object for estimating signatures analytically - `sc_data=adata_snrna_raw`)
        sc_data=inf_aver,
        # Spatial data as anndata object
        sp_data=adata_vis,
        verbose=True,
        # the column in sc_data.obs that gives cluster idenitity of each cell
        summ_sc_data_args={'cluster_col': covariate_col_names},
        train_args={
            'use_raw':
            True,  # By default uses raw slots in both of the input datasets.
            'n_iter':
            15000,  # Increase the number of iterations if needed (see below)

            # Whe analysing the data that contains multiple samples,
            # cell2location will select a model version which pools information across samples
            # For details see https://cell2location.readthedocs.io/en/latest/cell2location.models.html#module-cell2location.models.CoLocationModelNB4E6V2
            'sample_name_col': 'sample'
        },  # Column in sp_data.obs with Sample ID

        # Number of posterios samples to use for estimating parameters,
        # reduce if not enough GPU memory
        posterior_args={'n_samples': 1000},
        export_args={
            'path':
            results_folder + 'std_model/',  # path where to save results
            'run_name_suffix': ''  # optinal suffix to modify the name the run
        },
        model_kwargs=
        {  # Prior on the number of cells, cell types and co-located combinations
            'cell_number_prior': {
                # Use visual inspection of the tissue image to determine
                # the average number of cells per spot,
                # an approximate count is good enough:
                'cells_per_spot': 8,
                # Prior on the number of cell types (or factors) in each spot
                'factors_per_spot': 7,
                # Prior on the number of correlated cell type combinations in each spot
                'combs_per_spot': 2.5
            },

            # Prior on change in sensitivity between technologies
            'gene_level_prior': {
                # Prior on average change in expression level from scRNA-seq to spatial technology,
                # this reflects your belief about the sensitivity of the technology in you experiment
                'mean': 1 / 2,
                # Prior on how much individual genes differ from that average,
                # a good choice of this value should be lower that the mean
                'sd': 1 / 4
            }
        })
Пример #9
0
seed(2021)
matplotlib.use('TkAgg')

base_path = '/Users/zhongyuanke/data/'
anterior_out_path = 'dann_vae/spatial/rna_anterior_davae_01.h5ad'
posterior_out_path = 'dann_vae/spatial/rna_posterior_davae_01.h5ad'
file_rna = base_path + 'spatial/mouse_brain/adata_processed_sc.h5ad'
rna_anterior_orig = base_path + 'dann_vae/spatial/rna_anterior_orig.h5ad'

file1_spatial = base_path + 'spatial/mouse_brain/10x_mouse_brain_Anterior/'
file2_spatial = base_path + 'spatial/mouse_brain/10x_mouse_brain_Posterior/'
file1 = base_path + 'spatial/mouse_brain/10x_mouse_brain_Anterior/V1_Mouse_Brain_Sagittal_Anterior_filtered_feature_bc_matrix.h5'
file2 = base_path + 'spatial/mouse_brain/10x_mouse_brain_Posterior/V1_Mouse_Brain_Sagittal_Posterior_filtered_feature_bc_matrix.h5'
figure_umap = base_path + 'dann_vae/spatial/umap.png'

adata_spatial_anterior = sc.read_visium(file1_spatial, count_file=file1)
adata_spatial_posterior = sc.read_visium(file2_spatial, count_file=file2)
adata_spatial_anterior.var_names_make_unique()
adata_spatial_posterior.var_names_make_unique()
adata_rna = sc.read_h5ad(file_rna)
# sc.pp.filter_genes(adata_rna, min_cells=500)
# sc.pp.highly_variable_genes(adata_rna, n_top_genes=5000)
# features = adata_rna.var_names[adata_rna.var['highly_variable']]
# adata_rna = adata_rna[:,features]
# adata_rna.write_h5ad(base_path+'spatial/mouse_brain/cortex_for_seurat.h5ad')
print(adata_rna)
print(adata_spatial_anterior)
print(adata_spatial_posterior)
adata_spatial_anterior = adata_spatial_anterior[
    adata_spatial_anterior.obsm["spatial"][:, 1] < 6000, :]
adata_spatial_posterior = adata_spatial_posterior[
Пример #10
0
def test_read_visium_counts():
    # Test that checks the read_visium function
    visium_pth = ROOT / '../visium_data/1.0.0'
    spec_genome_v3 = sc.read_visium(visium_pth, genome='GRCh38')
    nospec_genome_v3 = sc.read_visium(visium_pth)
    assert_anndata_equal(spec_genome_v3, nospec_genome_v3)
Пример #11
0
def Read10X(
    path: Union[str, Path],
    genome: Optional[str] = None,
    count_file: str = "filtered_feature_bc_matrix.h5",
    library_id: str = None,
    load_images: Optional[bool] = True,
    quality: _QUALITY = "hires",
    image_path: Union[str, Path] = None,
) -> AnnData:
    """\
    Read Visium data from 10X (wrap read_visium from scanpy)

    In addition to reading regular 10x output,
    this looks for the `spatial` folder and loads images,
    coordinates and scale factors.
    Based on the `Space Ranger output docs`_.

    .. _Space Ranger output docs: https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/output/overview

    Parameters
    ----------
    path
        Path to directory for visium datafiles.
    genome
        Filter expression to genes within this genome.
    count_file
        Which file in the passed directory to use as the count file. Typically would be one of:
        'filtered_feature_bc_matrix.h5' or 'raw_feature_bc_matrix.h5'.
    library_id
        Identifier for the visium library. Can be modified when concatenating multiple adata objects.
    load_images
        Load image or not.
    quality
        Set quality that convert to stlearn to use. Store in anndata.obs['imagecol' & 'imagerow']
    image_path
        Path to image. Only need when loading full resolution image.


    Returns
    -------
    Annotated data matrix, where observations/cells are named by their
    barcode and variables/genes by gene name. Stores the following information:
    :attr:`~anndata.AnnData.X`
        The data matrix is stored
    :attr:`~anndata.AnnData.obs_names`
        Cell names
    :attr:`~anndata.AnnData.var_names`
        Gene names
    :attr:`~anndata.AnnData.var`\\ `['gene_ids']`
        Gene IDs
    :attr:`~anndata.AnnData.var`\\ `['feature_types']`
        Feature types
    :attr:`~anndata.AnnData.uns`\\ `['spatial']`
        Dict of spaceranger output files with 'library_id' as key
    :attr:`~anndata.AnnData.uns`\\ `['spatial'][library_id]['images']`
        Dict of images (`'fulres'`, `'hires'` and `'lowres'`)
    :attr:`~anndata.AnnData.uns`\\ `['spatial'][library_id]['scalefactors']`
        Scale factors for the spots
    :attr:`~anndata.AnnData.uns`\\ `['spatial'][library_id]['metadata']`
        Files metadata: 'chemistry_description', 'software_version'
    :attr:`~anndata.AnnData.obsm`\\ `['spatial']`
        Spatial spot coordinates, usable as `basis` by :func:`~scanpy.pl.embedding`.
    """

    from scanpy import read_visium

    adata = read_visium(
        path,
        genome=genome,
        count_file=count_file,
        library_id=library_id,
        load_images=load_images,
    )
    adata.var_names_make_unique()

    if library_id is None:
        library_id = list(adata.uns["spatial"].keys())[0]

    if quality == "fulres":
        image_coor = adata.obsm["spatial"]
        img = plt.imread(image_path, 0)
        adata.uns["spatial"][library_id]["images"]["fulres"] = img
    else:
        scale = adata.uns["spatial"][library_id]["scalefactors"]["tissue_" +
                                                                 quality +
                                                                 "_scalef"]
        image_coor = adata.obsm["spatial"] * scale

    adata.obs["imagecol"] = image_coor[:, 0]
    adata.obs["imagerow"] = image_coor[:, 1]
    adata.uns["spatial"][library_id]["use_quality"] = quality

    return adata
Пример #12
0
def read_each(i):
    adata = sc.read_visium(i)
    adata.var_names_make_unique()
    # flip Y axis to show correctly in cellxgene VIP
    adata.obsm['spatial'][:, 1] = -adata.obsm['spatial'][:, 1]
    return (adata)
Пример #13
0
    adata_spatial.obs['celltype'] = pred_type
    # adata_davae.obs['cell type'] = all_type
    adata_spatial.write_h5ad(base_path + 'dann_vae/spatial/'+type+'_label_02.h5ad')


base_path = '/Users/zhongyuanke/data/'
file1 = base_path + 'spatial/mouse_brain/10x_mouse_brain_Anterior/' \
                    'V1_Mouse_Brain_Sagittal_Anterior_filtered_feature_bc_matrix.h5'
file2 = base_path + 'spatial/mouse_brain/10x_mouse_brain_Posterior/' \
                    'V1_Mouse_Brain_Sagittal_Posterior_filtered_feature_bc_matrix.h5'
file1_spatial = base_path+'spatial/mouse_brain/10x_mouse_brain_Anterior/'
file2_spatial = base_path+'spatial/mouse_brain/10x_mouse_brain_Posterior/'
rna_path = base_path+'spatial/mouse_brain/adata_processed_sc.h5ad'

adata1 = sc.read_visium(file1_spatial, count_file=file1)
adata2 = sc.read_visium(file2_spatial, count_file=file2)
adata_rna = sc.read_h5ad(rna_path)
adata1.var_names_make_unique()
adata2.var_names_make_unique()
adata1 = adata1[
    adata1.obsm["spatial"][:, 1] < 6000, :
]
adata2 = adata2[
    (adata2.obsm["spatial"][:, 1] < 4000)
    & (adata2.obsm["spatial"][:, 0] < 6000),
    :,
]

deep_label_transfer(adata2, adata_rna, type='posterior')