Пример #1
0
    for j in range(cm.shape[0]):

        dd = dist_fun(cm_np[i, :], cm_np[j, :])
        dist_mat[i, j] = dd

dist_mat = pd.DataFrame(dist_mat, index=cm.index, columns=cm.index)

# need counts, distances, and num_umi

hs = hotspot.Hotspot(counts, distances=dist_mat, umi_counts=num_umi)
hs.create_knn_graph(weighted_graph=False,
                    n_neighbors=n_neighbors,
                    neighborhood_factor=3)

# %% Plot scores for all modules
modules_to_compute = sorted([x for x in modules.unique() if x != -1])

# Get the scores
module_scores = {}
for module in modules_to_compute:
    module_genes = modules.index[modules == module]

    scores = hotspot.modules.compute_scores(counts.loc[module_genes].values,
                                            model, num_umi.values,
                                            hs.neighbors.values,
                                            hs.weights.values)

    module_scores[module] = scores

module_scores = pd.DataFrame(module_scores)
module_scores.index = counts.columns
Пример #2
0
# %% Load Modules

modules = pd.read_table(
    "../../CD4_w_protein/hotspot/modules.txt",
    index_col=0
).Cluster

Z = pd.read_table(
    "../../CD4_w_protein/hotspot/linkage.txt",
    header=None
).values

# %% Modules to gene sets

gene_sets = {}
for i in modules.unique():
    if i == -1: continue
    genes = modules[modules == i].index
    genes = {ens_map[x].upper() for x in genes}
    gene_sets[i] = genes

all_genes = {ens_map[x].upper() for x in hs_results.index}

# %% Load GO sets

from gene_enrich import load_gene_set_gmt

# go_sets = load_gene_set_gmt("/data/yosef2/users/david.detomaso/Signatures/GO/GO_biological_process.gmt")
go_sets = load_gene_set_gmt("/data/yosef2/users/david.detomaso/Signatures/Enrichr/GO_Biological_Process_2015.txt")

# The Enrichr one is better!
Пример #3
0
modules = pd.read_table("../../CD4_w_protein/hotspot/modules.txt",
                        index_col=0).Cluster

Z = pd.read_table("../../CD4_w_protein/hotspot/linkage.txt",
                  header=None).values

scores = pd.read_table("../../CD4_w_protein/hotspot/module_scores.txt.gz",
                       index_col=0)

proj = pd.read_table("../../CD4_w_protein/umap/umap_hvg.txt", index_col=0)

# %% Plot Modules

colors = list(plt.get_cmap("tab20").colors)
module_colors = {i: colors[(i - 1) % len(colors)] for i in modules.unique()}
module_colors[-1] = '#ffffff'

cm = ScalarMappable(norm=Normalize(0, 0.05, clip=True), cmap="viridis")
row_colors1 = pd.Series(
    [module_colors[i] for i in modules],
    index=z_scores.index,
)

row_colors = pd.DataFrame({
    "Modules": row_colors1,
})

zvals = z_scores.values.ravel()
vmax = 8
vmin = -8