def allvlmc_vs_allfb__mrkr(): ab = df_expr_ab.reindex(mrkr.index, axis=1).dropna(axis=1) bh = df_expr_bh.reindex(mrkr.index, axis=1).dropna(axis=1) assert set(ab.columns) == set(bh.columns) ab = df_expr_ab[df_meta_ab.subclass_label == "VLMC"] bh = df_expr_bh[df_meta_bh.celltype.str.startswith("FB")] ab = ab.reindex( df_meta_ab.reindex(ab.index).cell_type_alias_label.sort_values().index) bh = bh.reindex(df_meta_bh.reindex(bh.index).celltype.sort_values().index) S = pd.DataFrame(index=bh.index, columns=ab.index, data=cosine_similarity(bh, ab)) njj = ab.index.groupby( df_meta_ab.reindex(ab.index).cell_type_alias_label.to_list()) mii = bh.index.groupby(df_meta_bh.reindex(bh.index).celltype.to_list()) fig: plt.Figure (fig, AX) = plt.subplots(len(mii), len(njj)) for (i, (m, ii)) in enumerate(mii.items()): for (j, (n, jj)) in enumerate(njj.items()): ax = AX[i, j] ax.imshow(S.loc[ii, jj], aspect='auto', cmap='Greys') ax.axis('off') # No effect? plt.xlabel("Mouse-WCH-2020") plt.ylabel("Betsholtz-2018") fig.savefig(out_dir / f"{whatsmyname()}.png")
def avg_cos_sim__mrkr(): ab = df_expr_ab.reindex(mrkr.index, axis=1).dropna(axis=1) bh = df_expr_bh.reindex(mrkr.index, axis=1).dropna(axis=1) assert set(ab.columns) == set(bh.columns) # # DEBUG # df_expr_bh = df_expr_bh.sample(n=9, random_state=43) # df_expr_ab = df_expr_ab.sample(n=8, random_state=43) # Cell label anno_bh = df_meta_bh.reindex(bh.index).celltype anno_ab = df_meta_ab.reindex(ab.index).cell_type_alias_label bh = bh.groupby(by=list(anno_bh)).mean() ab = ab.groupby(by=list(anno_ab)).mean() S = pd.DataFrame(index=bh.index, columns=ab.index, data=cosine_similarity(bh, ab)) with Plox() as px: px.a.imshow(S, cmap="Greys") px.a.set_xticks(range(len(S.columns))) px.a.set_xticklabels(S.columns, rotation=90) px.a.set_xlabel("Mouse-WCH-2020") px.a.set_yticks(range(len(S.index))) px.a.set_yticklabels(S.index) px.a.set_ylabel("Betsholtz-2018") px.f.savefig(out_dir / f"{whatsmyname()}.png")
def avg_cos_sim__mrkr(): # This is from Betsholtz-2018 # https://www.nature.com/articles/nature25739/figures/1 # 1c markers = { 'PC': "Pdgfrb Cspg4 Anpep Rgs5 Cd248 Abcc9 Vtn S1pr3", 'SMC': "Acta2 Tagln Myh11 Myl9 Mylk Sncg Cnn1 Pln", 'MG': "Csf1r Cd68 Cd53 Cd48 Cd84 C1qa Fcgr1", # removed Emr1 (missing in AB) 'FB': "Pdgfra Lum Dcn Col3a1 Col5a1 Col8a2 Col12a1 Mmp2", 'OL': "Mobp Plp1 Mog Cldn11 Mag Gjc2 Mal Cnp", 'EC': "Pecam1 Kdr Flt1 Tie1 Tek Icam2 Podxl Ptprb", 'AC': "Aldh1l1 Fgfr3 Slc4a4 Slc6a11 Slc7a10 Mlc1 Slc1a3 Cldn10", } markers = sorted(set(' '.join(markers.values()).split(' '))) ab = df_expr_ab.loc[markers].T cb = df_expr_cb.loc[markers].T assert set(ab.columns) == set(cb.columns) # # DEBUG # df_expr_cb = df_expr_cb.sample(n=9, random_state=43) # df_expr_ab = df_expr_ab.sample(n=8, random_state=43) # Cell label anno_cb = df_meta_cb.reindex(cb.index).celltype anno_ab = df_meta_ab.reindex(ab.index).cell_type_alias_label cb = cb.groupby(by=list(anno_cb)).mean() ab = ab.groupby(by=list(anno_ab)).mean() S = pd.DataFrame(index=cb.index, columns=ab.index, data=cosine_similarity(cb, ab)) with Plox() as px: px.a.imshow(S, cmap="Greys") px.a.set_xticks(range(len(S.columns))) px.a.set_xticklabels(S.columns, rotation=90) px.a.set_xlabel("Mouse-WCH-2020") px.a.set_yticks(range(len(S.index))) px.a.set_yticklabels(S.index) px.a.set_ylabel(dataset_name) px.f.savefig(out_dir / f"{whatsmyname()}.png")
def cluster(df_expr): df = pd.DataFrame(index=df_expr.index, columns=["x", "y"], data=TSNE(random_state=43).fit_transform(df_expr)) df = df.assign(celltype=list(df_meta.reindex(df.index).celltype)) return df
from sklearn.manifold import TSNE out_dir = mkdir(Path(__file__).with_suffix('')) # df_expr = pd.DataFrame(np.random.RandomState(0).random(size=(30, 5))) from z_sources import df_expr, df_meta, df_mrkr # # DEBUG -- subset samples # df_expr = df_expr.sample(n=100, random_state=43) # # Subset to marker genes # df_expr = df_expr[df_mrkr.index] # Order samples df_meta = df_meta.reindex(df_expr.index) styles = pd.DataFrame(index=['marker', 'c'], data={ 'PC': ('s', 'red'), 'vSMC': ('s', 'green'), 'aaSMC': ('o', 'green'), 'aSMC': ('^', 'green'), 'MG': ('s', 'gray'), 'FB1': ('^', 'purple'), 'FB2': ('s', 'violet'), 'OL': ('s', 'brown'), 'EC1': ('s', 'cyan'), 'EC2': ('o', 'cyan'), 'EC3': ('^', 'cyan'), 'vEC': ('s', 'blue'),
def histograms(): genes = [ "Trpm3", "mt-Co1", "mt-Co3", "Nnat", "Ptgds", "Adam12", "Alcam", # Up early "Itih5", "Malat1", "Zbtb20", "Spp1", "Col15a1", "Ece1", "Cemip", # Up late ] ab = df_expr_ab.reindex(genes, axis=1).dropna(axis=1) bh = df_expr_bh.reindex(genes, axis=1).dropna(axis=1) assert list(ab.columns) == list(bh.columns) # Norm1 normalization # ab = ab.div(ab.sum(axis=1), axis=0) # bh = bh.div(bh.sum(axis=1), axis=0) # log1p trafo ab = ab.transform(lambda x: np.log(x + 1)) bh = bh.transform(lambda x: np.log(x + 1)) ab = ab[df_meta_ab.subclass_label == "VLMC"] bh = bh[df_meta_bh.celltype.str.startswith("FB")] assert list(ab.columns) == list(bh.columns) genes = sorted(ab.columns) # g = first(genes) colors = { 'FB1': 'purple', 'FB2': 'violet', '374_VLMC': "C0", '375_VLMC': "C1", '376_VLMC': "C2", } for g in genes: expr: pd.Series with Plox() as px: grps = [ ab[g].groupby( df_meta_ab.reindex(ab.index).cell_type_alias_label), bh[g].groupby(df_meta_bh.reindex(bh.index).celltype), ] for grp in grps: for (label, expr) in grp: if any(expr): f = gaussian_kde(expr) xx = np.linspace(0, max(expr) * 1.5, 100) px.a.plot(xx, f(xx), label=f"{label} ({len(expr)})", color=colors[label]) px.a.set_xlabel("log1p(count)") px.a.legend() px.a.set_yticks([]) px.f.savefig(out_dir / f"hist_{g}.png")