for j in range(cm.shape[0]): dd = dist_fun(cm_np[i, :], cm_np[j, :]) dist_mat[i, j] = dd dist_mat = pd.DataFrame(dist_mat, index=cm.index, columns=cm.index) # need counts, distances, and num_umi hs = hotspot.Hotspot(counts, distances=dist_mat, umi_counts=num_umi) hs.create_knn_graph(weighted_graph=False, n_neighbors=n_neighbors, neighborhood_factor=3) # %% Plot scores for all modules modules_to_compute = sorted([x for x in modules.unique() if x != -1]) # Get the scores module_scores = {} for module in modules_to_compute: module_genes = modules.index[modules == module] scores = hotspot.modules.compute_scores(counts.loc[module_genes].values, model, num_umi.values, hs.neighbors.values, hs.weights.values) module_scores[module] = scores module_scores = pd.DataFrame(module_scores) module_scores.index = counts.columns
# %% Load Modules modules = pd.read_table( "../../CD4_w_protein/hotspot/modules.txt", index_col=0 ).Cluster Z = pd.read_table( "../../CD4_w_protein/hotspot/linkage.txt", header=None ).values # %% Modules to gene sets gene_sets = {} for i in modules.unique(): if i == -1: continue genes = modules[modules == i].index genes = {ens_map[x].upper() for x in genes} gene_sets[i] = genes all_genes = {ens_map[x].upper() for x in hs_results.index} # %% Load GO sets from gene_enrich import load_gene_set_gmt # go_sets = load_gene_set_gmt("/data/yosef2/users/david.detomaso/Signatures/GO/GO_biological_process.gmt") go_sets = load_gene_set_gmt("/data/yosef2/users/david.detomaso/Signatures/Enrichr/GO_Biological_Process_2015.txt") # The Enrichr one is better!
modules = pd.read_table("../../CD4_w_protein/hotspot/modules.txt", index_col=0).Cluster Z = pd.read_table("../../CD4_w_protein/hotspot/linkage.txt", header=None).values scores = pd.read_table("../../CD4_w_protein/hotspot/module_scores.txt.gz", index_col=0) proj = pd.read_table("../../CD4_w_protein/umap/umap_hvg.txt", index_col=0) # %% Plot Modules colors = list(plt.get_cmap("tab20").colors) module_colors = {i: colors[(i - 1) % len(colors)] for i in modules.unique()} module_colors[-1] = '#ffffff' cm = ScalarMappable(norm=Normalize(0, 0.05, clip=True), cmap="viridis") row_colors1 = pd.Series( [module_colors[i] for i in modules], index=z_scores.index, ) row_colors = pd.DataFrame({ "Modules": row_colors1, }) zvals = z_scores.values.ravel() vmax = 8 vmin = -8