def main(save_folder, adata): tissue_layers = ['upper EPIDERMIS', 'middle EPIDERMIS', 'basal EPIDERMIS'] img_key = 'hires' # 1. Get cytokines and responders t_cell_cytocines, cyto_resps_list, cytokine_responders = gene_lists.get_publication_cyto_resps( ) # 3. Add meta data like which samples belong to which donor (optional) if "patient" not in adata.obs_keys(): adata, tissue_cell_labels, disease_labels, lesion_labels = ctools.add_metadata( adata) # 1.2 Remove spots having no tissue/cell labels (since 06.10.2020) adata = adata[np.where( adata.obs[tissue_cell_labels].to_numpy().any(axis=1))[0]] # 4. Use only tissue tyoes of interest # 4.1 Add tissue types adata = ctools.add_tissue_obs(adata) # 4.2 Subset adata to tissue_types of interest: upper EPIDERMIS', 'middle EPIDERMIS', 'basal EPIDERMIS' bool_col = adata.obs[tissue_layers] == 1 merged = bool_col.sum(axis=1) adata = adata[merged == 1] # Rename tissue region 'INTERFACE' to basal EPIDERMIS because some spots got both labels m_interface = adata.obs['tissue_type'] == 'INTERFACE' adata.obs['tissue_type'][m_interface] = 'basal EPIDERMIS' """Paper Figure 4D: Highlight cytokine and responder genes containing spots and UMI-counts in the EPIDERMIS """ convert_categories_cytokines_responders_others( adata, cyto_responder_genes=cytokine_responders, save_folder=save_folder, img_key=img_key)
def main(save_folder, spatial_adata): """Read out data for ST DGE Analysis and create UMAPs for Figure 3A :return: """ spatial_cluster_label = 'tissue_type' # load data cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps( ) # remove all spots without a tissue label spatial_adata = spatial_adata[ spatial_adata.obs[spatial_cluster_label] != 'Unknown'] # 1. get observable for cytokine genes spatial_adata, obs_name = add_observables.convert_variable_to_observable( adata=spatial_adata, gene_names=cytokines, task='cell_gene', label='celltype', condition=None) # 2. Highlight tissues epidermis and dermis + cytokines and for each single cytokine plot_tissuerlayers_cyto(adata=spatial_adata, obs_name='cytokine_IL17A', title='Wholedataset_IL17A', save_folder=save_folder, regions=spatial_cluster_label)
def main(save_folder, spatial_adata): """Read out data for ST and scRNA-seq DGE Analysis and create UMAPs for Figure 3A/E and Suppl. Figures 3 :return: """ spatial_cluster_label = 'tissue_type' # load data cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps( ) leukocyte_markers = gene_lists.leukocyte_markers() # remove all spots without a tissue label spatial_adata = spatial_adata[ spatial_adata.obs[spatial_cluster_label] != 'Unknown'] # 1. get observable for cytokine genes spatial_adata, obs_name = add_observables.convert_variable_to_observable( adata=spatial_adata, gene_names=cytokines, task='cell_gene', label='celltype', condition=None) spatial_adata, _ = add_observables.convert_variable_to_observable( adata=spatial_adata, gene_names=leukocyte_markers, task='cell_gene', label='celltype', condition=None) # # 2. Read out counts and metaData for DGE Analysis including double positive cytokine cells # 2.1 Read out only leukocytes spots by 'CD2', 'CD3D', 'CD3E', 'CD3G', 'CD247' and 'PTPRC' surface markers adata_leukocytes = get_celltypes_data(spatial_adata, genes=leukocyte_markers) # 2.2 Merge layers of epidermis and save it as epidermis and merge dermis depths and save it as dermis adata_leukocytes = get_tissueregions(adata=adata_leukocytes, tissue_label=spatial_cluster_label) # 3. Highlicht tissues epidermis and dermis + cytokines and for each single cytokine plot_tissueregions_cyto(adata=adata_leukocytes, obs_name='cytokine_IL13', title='Leukocytes_IL13', save_folder=save_folder) plot_tissueregions_cyto(adata=adata_leukocytes, obs_name='cytokine_IFNG', title='Leukocytes_IFNG', save_folder=save_folder) # 4. Read out all leukocyte positive spots include_cytokine_dp(adata=adata_leukocytes, cytokines=cytokines, save_folder=save_folder, label=spatial_cluster_label, key='ST', paper_figure='3AC_Leukocytes')
def main(save_folder, adata): """Read out spots for DGE analysis and create UMAP of single cell RNA-seq data for Suppl. Figures 4B Parameters ---------- save_folder : str adata : annData Returns ------- """ sc_cluster_obs = 'cluster_labels' # 1. load gene list cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps( ) leukocyte_markers = gene_lists.leukocyte_markers() # 2. get observable for cytokine genes adata, obs_name = add_observables.convert_variable_to_observable( adata=adata, gene_names=cytokines, task='cell_gene', label='celltype', condition=None) # Only Leukocytes: # 3. Read out counts and metaData for DGE Analysis including double positive cytokine cells # 3.1 Read out only T-cell spots by CD2 surface markers adata_leukocytes = get_celltypes_data(adata, genes=leukocyte_markers) # 3.3 Read out all leukocyte cells include_cytokine_dp(adata=adata_leukocytes, cytokines=cytokines, save_folder=save_folder, label=sc_cluster_obs, key='SC_merged', paper_figure='SC') # 3.4 Add IL17A label to adata adata_leukocytes = add_observables.add_columns_genes( adata=adata_leukocytes, genes='IL17A', label='IL17A') """ Figure 3D: Highlight IL-17A """ plot_annotated_cells(adata=adata_leukocytes, color='IL17A_label', paper_figure='D', save_folder=save_folder, key='SC', title='Leukocytes_IL17A', xpos=0.02, ypos=0.95)
def main(save_folder, adata): """Read out spots for DGE analysis and create UMAP of single cell RNA-seq data for Suppl. Figures 4B Parameters ---------- save_folder : str adata : annData Returns ------- """ sc_cluster_obs = 'cluster_labels' # 1. load gene list cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps() leukocyte_markers = gene_lists.leukocyte_markers() # 2. get observable for cytokine genes adata, obs_name = add_observables.convert_variable_to_observable( adata=adata, gene_names=cytokines, task='cell_gene', label='celltype', condition=None) # Only Leukocytes: # 3. Read out counts and metaData for DGE Analysis including double positive cytokine cells # 3.1 Read out only T-cell spots by CD2 surface markers adata_leukocytes = get_celltypes_data(adata, genes=leukocyte_markers) # 3.3 Read out all leukocyte cells include_cytokine_dp(adata=adata_leukocytes, cytokines=cytokines, save_folder=save_folder, label=sc_cluster_obs, key='SC_merged', paper_figure='SC') # 3.3 Read out all leukocyte cells but exclude double positive cytokine cells adata_leukocytes, obs_name = exclude_cytokine_dp(adata=adata_leukocytes, cytoresps_dict=cytoresps_dict) # Plot cytokines and highlight double positive plot_annotated_cells(adata=adata_leukocytes, color='cytokines_others', paper_figure='', save_folder=save_folder, key='SC', title="Leukocytes_IL17A_IFNG", xpos=0.02, ypos=0.95, palette=["#ff7f00", "#377eb8", 'purple']) # Add cytokine label to adata and Plot: Highlight cytokines adata_leukocytes = add_observables.add_columns_genes(adata=adata_leukocytes, genes='IFNG', label='IFNG') """ Suppl. Figure 4B: Highlight IFN-g """ plot_annotated_cells(adata=adata_leukocytes, color='IFNG_label', paper_figure='4B', save_folder=save_folder, key='SC', title="Leukocyte_IFNG", xpos=0.02, ypos=0.95)
def main(save_folder, adata): img_key = 'hires' # 1. Get cytokines and responders t_cell_cytocines, cyto_resps_list, cytokine_responders = gene_lists.get_publication_cyto_resps( ) # 3. Add meta data like which samples belong to which donor (optional) if "patient" not in adata.obs_keys(): adata, tissue_cell_labels, disease_labels, lesion_labels = ctools.add_metadata( adata) # 1.2 Remove spots having no tissue/cell labels (since 06.10.2020) adata = adata[np.where( adata.obs[tissue_cell_labels].to_numpy().any(axis=1))[0]] """Paper Figure 4B: Highlight cytokine and responder genes containing spots and UMI-counts """ convert_categories_cytokines_responders_others( adata, cyto_responder_genes=cytokine_responders, save_folder=save_folder, img_key=img_key)
def main(save_folder, pp_adata, cluster_algorithm): """ 1. scRNAseq data set Read ou pre-processed Count matrix and apply Leiden clustering with resolution r = 0.1 for scRNAseq data set Annotate clusters manually 2. ST data set Visualise count matrix with tissue types :return: """ # 1. load gene list cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps( ) # 2. Get observable for cytokine genes pp_adata, _ = add_observables.convert_variable_to_observable( adata=pp_adata, gene_names=cytokines, task='cell_gene', label='celltype', condition=None) # 3. Apply cluster algorithm pp_adata, key = apply_clusteralgo(adata=pp_adata, algorithm=cluster_algorithm, resolution=0.1) # 4. Annotate clusters with expert opinion - before the best resolution r=0.1 was identified pp_adata = annotate_cluster(adata=pp_adata, cluster_algorithm=cluster_algorithm, resolution=0.1) # 5. Plot UMAP scRNAseq data visualise_clusters(adata=pp_adata, save_folder=save_folder, key='cluster_labels', title="SC")
def main(adata, save_folder, tissue_types, radii, get_plots=False): """ Parameters ---------- adata : annData save_folder : str tissue_types : str, list radii : int, list get_plots : bool Returns ------- """ # 1. Get cytokines and responders conditional_genes, _, conditionalgenes_responders = gene_lists.get_publication_cyto_resps() # 2. prepare adata object adata = data_preparation(adata=adata, tissue_types=tissue_types, conditional_genes=conditional_genes, conditionalgenes_responders=conditionalgenes_responders) # 3. Run conditional clustering and calculate (spatially weighted) correlation sig = [] if isinstance(radii, list): for radius in radii: sig = run_spatialcorrelation(adata=adata, tissue_types=tissue_types, cytokine_responders=conditionalgenes_responders, save_folder=save_folder, radius=radius, sig=sig, get_plots=get_plots) else: sig = run_spatialcorrelation(adata=adata, tissue_types=tissue_types, cytokine_responders=conditionalgenes_responders, save_folder=save_folder, radius=radii, sig=sig, get_plots=get_plots) if len(sig) > 1: # 6. Evaluate distance via elbow plot plot_evaluations.plot_evaluate_distance( significance=sig, cytokines=conditional_genes, save_folder=save_folder)
def main(dataset_type, save_folder, df_keys, log, dge_results_folder): """ Parameters ---------- dataset_type : str save_folder : str df_keys : list log : bool dge_results_folder : str Returns ------- """ # Determine name of cluster observable if dataset == 'SC': cluster_label = 'cluster_labels' else: cluster_label = 'tissue_type' print("# ------ Load data ------ #") cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps( ) genes_to_highlight = gene_lists.highlight_genes() # # 1. Load adata adata = load_adata(type_dataset=dataset_type, cluster_label=cluster_label) # # 2 Read out only T-cell spots by leukocyte markers adata = get_sub_adata(adata=adata, gene=gene_lists.leukocyte_markers()) # # 3. Assign condition to spots adata, observable = exclude_cytokine_dp(adata=adata, cytoresps_dict=cytoresps_dict) for cyto in cytokines: # get observable of condition obs_label_condition = "_".join(['cytokine', cyto]) genes_labeling = genes_to_highlight[cyto] all_csv_files = [ file for path, subdir, files in os.walk(dge_results_folder) for file in glob.glob(os.path.join(path, '*.csv')) ] pattern = "".join(['*', cyto, '*all_genes.csv']) for file in all_csv_files: if fnmatch(file, pattern): # Read out only those driver and responder genes which are specific for a cytokine allgenes_df = pd.read_csv(file, error_bad_lines=False) # Remove column Unnamed: 0 allgenes_df = allgenes_df.drop(['Unnamed: 0'], axis=1) # Check if column names and row names are unique print("Unique Genes Stratified Sampling:", allgenes_df['gene_symbol'].is_unique) print("Unique Genes Stratified Sampling:", allgenes_df.columns.is_unique) # remove duplicated rows allgenes_df = allgenes_df.loc[ ~allgenes_df['gene_symbol'].duplicated(), :] # Name of used design function design = file.split(os.sep)[-4] # Name of used DGE Analysis method method = file.split(os.sep)[-1].split("_")[-4] # Create output folder output_folder = os.path.join(save_folder, design, cyto) os.makedirs(output_folder, exist_ok=True) allgenes_df = _write_dataframe(adata, df=allgenes_df, cytokine=cyto, observable=obs_label_condition, output_folder=output_folder, method=method) print("# ------ Volcano plot ------ #") # 3. Volcano plot interactive plot plotly_interactive_volcano( df=allgenes_df, df_keys=df_keys, save_folder=output_folder, key="".join([method, "_", cyto, "+", "_vs_", cyto, "-"]), x_lab=r'log$_2$(FC)', y_lab=r'-log$_{10}$(pvalue)', log2fc_cut=1, pval_cut=0.05) volcano_plot(df=allgenes_df, df_keys=df_keys, cytokine=cyto, adjust=True, label_genes=genes_labeling, title="_".join( [method, cyto, "Volcano_plot_zoom"]), save_folder=output_folder, log2fc_cut=1.0, threshold=0.05) print( "# ------ Violin plots of Novel and Golden Standard genes ------ #" ) # Get expression level of genes of interest to create boxplots if isinstance(genes_labeling, dict): driver_group = genes_to_highlight[cyto]['Driver_genes'] responder_group = genes_to_highlight[cyto][ 'Responder_genes'] merged_genes = driver_group.copy() merged_genes.extend(responder_group) else: merged_genes = genes_labeling.copy() # First check if genes are in data set available_genes = list( set(adata.var.index) & set(merged_genes)) available_genes.append(cyto) # 3. get counts for each gene of interest goi = pd.DataFrame() goi['Cyto+_vs_Cyto-'] = adata.obs[obs_label_condition].values # available_genes = ['IL17A', 'IFNG', 'IL13'] for gene in available_genes: if gene in adata.var_names: # Get counts adata_gene, new_obs_name = get_expression_values( adata=adata, gene=gene) # 4. sub-divide into cyto+ and cyto- group and read out counts spatial_adata_gene = create_obs_cytopos_cytoneg( adata=adata_gene, cyto=cyto, gene=gene, observable=obs_label_condition) goi[gene] = spatial_adata_gene.obs["_".join( [gene, 'group'])].values # 5. Visualise counts of gene of interest in a violin plot plot_violins(adata=spatial_adata_gene, group=gene, groupby=obs_label_condition, output_folder=output_folder, log=log) # Save counts of genes of interest goi.to_csv( os.path.join( output_folder, "_".join([cyto, "Counts_Highlight_genes.csv"])))
def main(save_folder, spatial_adata): """ Read out data for ST and scRNA-seq DGE Analysis and create UMAPs for Figure 3A/E and Suppl. Figures 3 :return: """ spatial_cluster_label = 'tissue_type' # 1. load gene lists cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps( ) leukocyte_markers = gene_lists.leukocyte_markers() # 2. remove all spots without a tissue label spatial_adata = spatial_adata[ spatial_adata.obs[spatial_cluster_label] != 'Unknown'] # 3. get observable for cytokine genes and leukocyte markers spatial_adata, obs_name = add_observables.convert_variable_to_observable( adata=spatial_adata, gene_names=cytokines, task='cell_gene', label='celltype', condition=None) spatial_adata, obs_name = add_observables.convert_variable_to_observable( adata=spatial_adata, gene_names=leukocyte_markers, task='cell_gene', label='celltype', condition=None) # 4. Read out only leukocytes spots by 'CD2', 'CD3D', 'CD3E', 'CD3G', 'CD247' and 'PTPRC' surface markers adata_leukocytes = get_celltypes_data(spatial_adata, genes=leukocyte_markers) # 5. add observable healthy_disease spatial_adata = add_observables.add_disease_healthy_obs(spatial_adata) # keys: 'patient', 'biopsy_type', 'disease', 'tissue_type' # Suppl Figure 2A visualise_clusters(adata=spatial_adata, save_folder=save_folder, key='healthy_disease', title="Diagnoses") # Suppl. Figure 2C visualise_clusters(adata=adata_leukocytes, save_folder=save_folder, key='tissue_type', title="Leukocytes_tissuelayers") # 5. Read out spots which either have IL17A, IL13 or INFG genes adata_leukocytes, obs_name = exclude_cytokine_dp( adata=adata_leukocytes, cytoresps_dict=cytoresps_dict) # 6. Merge layers of epidermis and save it as epidermis and merge dermis depths and save it as dermis adata_leukocytes = get_tissueregions(adata=adata_leukocytes, tissue_label=spatial_cluster_label) # Suppl. Figure 2B plot_tissueregions_cyto(adata=adata_leukocytes, obs_name=obs_name, title='Leukocytes_Cytokines', save_folder=save_folder, gene_colors=[ "#ff7f00", "#e41a1c", 'darkgoldenrod', 'purple', "#377eb8", 'deeppink' ])