Пример #1
0
def allvlmc_vs_allfb__mrkr():
    ab = df_expr_ab.reindex(mrkr.index, axis=1).dropna(axis=1)
    bh = df_expr_bh.reindex(mrkr.index, axis=1).dropna(axis=1)
    assert set(ab.columns) == set(bh.columns)

    ab = df_expr_ab[df_meta_ab.subclass_label == "VLMC"]
    bh = df_expr_bh[df_meta_bh.celltype.str.startswith("FB")]

    ab = ab.reindex(
        df_meta_ab.reindex(ab.index).cell_type_alias_label.sort_values().index)
    bh = bh.reindex(df_meta_bh.reindex(bh.index).celltype.sort_values().index)

    S = pd.DataFrame(index=bh.index,
                     columns=ab.index,
                     data=cosine_similarity(bh, ab))

    njj = ab.index.groupby(
        df_meta_ab.reindex(ab.index).cell_type_alias_label.to_list())
    mii = bh.index.groupby(df_meta_bh.reindex(bh.index).celltype.to_list())

    fig: plt.Figure
    (fig, AX) = plt.subplots(len(mii), len(njj))

    for (i, (m, ii)) in enumerate(mii.items()):
        for (j, (n, jj)) in enumerate(njj.items()):
            ax = AX[i, j]
            ax.imshow(S.loc[ii, jj], aspect='auto', cmap='Greys')
            ax.axis('off')

    # No effect?
    plt.xlabel("Mouse-WCH-2020")
    plt.ylabel("Betsholtz-2018")

    fig.savefig(out_dir / f"{whatsmyname()}.png")
Пример #2
0
def avg_cos_sim__mrkr():
    ab = df_expr_ab.reindex(mrkr.index, axis=1).dropna(axis=1)
    bh = df_expr_bh.reindex(mrkr.index, axis=1).dropna(axis=1)
    assert set(ab.columns) == set(bh.columns)

    # # DEBUG
    # df_expr_bh = df_expr_bh.sample(n=9, random_state=43)
    # df_expr_ab = df_expr_ab.sample(n=8, random_state=43)

    # Cell label
    anno_bh = df_meta_bh.reindex(bh.index).celltype
    anno_ab = df_meta_ab.reindex(ab.index).cell_type_alias_label

    bh = bh.groupby(by=list(anno_bh)).mean()
    ab = ab.groupby(by=list(anno_ab)).mean()

    S = pd.DataFrame(index=bh.index,
                     columns=ab.index,
                     data=cosine_similarity(bh, ab))

    with Plox() as px:
        px.a.imshow(S, cmap="Greys")
        px.a.set_xticks(range(len(S.columns)))
        px.a.set_xticklabels(S.columns, rotation=90)
        px.a.set_xlabel("Mouse-WCH-2020")
        px.a.set_yticks(range(len(S.index)))
        px.a.set_yticklabels(S.index)
        px.a.set_ylabel("Betsholtz-2018")
        px.f.savefig(out_dir / f"{whatsmyname()}.png")
Пример #3
0
def avg_cos_sim__mrkr():
    # This is from Betsholtz-2018
    # https://www.nature.com/articles/nature25739/figures/1
    # 1c
    markers = {
        'PC': "Pdgfrb Cspg4 Anpep Rgs5 Cd248 Abcc9 Vtn S1pr3",
        'SMC': "Acta2 Tagln Myh11 Myl9 Mylk Sncg Cnn1 Pln",
        'MG':
        "Csf1r Cd68 Cd53 Cd48 Cd84 C1qa Fcgr1",  # removed Emr1 (missing in AB)
        'FB': "Pdgfra Lum Dcn Col3a1 Col5a1 Col8a2 Col12a1 Mmp2",
        'OL': "Mobp Plp1 Mog Cldn11 Mag Gjc2 Mal Cnp",
        'EC': "Pecam1 Kdr Flt1 Tie1 Tek Icam2 Podxl Ptprb",
        'AC': "Aldh1l1 Fgfr3 Slc4a4 Slc6a11 Slc7a10 Mlc1 Slc1a3 Cldn10",
    }

    markers = sorted(set(' '.join(markers.values()).split(' ')))

    ab = df_expr_ab.loc[markers].T
    cb = df_expr_cb.loc[markers].T
    assert set(ab.columns) == set(cb.columns)

    # # DEBUG
    # df_expr_cb = df_expr_cb.sample(n=9, random_state=43)
    # df_expr_ab = df_expr_ab.sample(n=8, random_state=43)

    # Cell label
    anno_cb = df_meta_cb.reindex(cb.index).celltype
    anno_ab = df_meta_ab.reindex(ab.index).cell_type_alias_label

    cb = cb.groupby(by=list(anno_cb)).mean()
    ab = ab.groupby(by=list(anno_ab)).mean()

    S = pd.DataFrame(index=cb.index,
                     columns=ab.index,
                     data=cosine_similarity(cb, ab))

    with Plox() as px:
        px.a.imshow(S, cmap="Greys")
        px.a.set_xticks(range(len(S.columns)))
        px.a.set_xticklabels(S.columns, rotation=90)
        px.a.set_xlabel("Mouse-WCH-2020")
        px.a.set_yticks(range(len(S.index)))
        px.a.set_yticklabels(S.index)
        px.a.set_ylabel(dataset_name)
        px.f.savefig(out_dir / f"{whatsmyname()}.png")
Пример #4
0
def cluster(df_expr):
    df = pd.DataFrame(index=df_expr.index,
                      columns=["x", "y"],
                      data=TSNE(random_state=43).fit_transform(df_expr))
    df = df.assign(celltype=list(df_meta.reindex(df.index).celltype))
    return df
Пример #5
0
from sklearn.manifold import TSNE

out_dir = mkdir(Path(__file__).with_suffix(''))

# df_expr = pd.DataFrame(np.random.RandomState(0).random(size=(30, 5)))

from z_sources import df_expr, df_meta, df_mrkr

# # DEBUG -- subset samples
# df_expr = df_expr.sample(n=100, random_state=43)

# # Subset to marker genes
# df_expr = df_expr[df_mrkr.index]

# Order samples
df_meta = df_meta.reindex(df_expr.index)

styles = pd.DataFrame(index=['marker', 'c'],
                      data={
                          'PC': ('s', 'red'),
                          'vSMC': ('s', 'green'),
                          'aaSMC': ('o', 'green'),
                          'aSMC': ('^', 'green'),
                          'MG': ('s', 'gray'),
                          'FB1': ('^', 'purple'),
                          'FB2': ('s', 'violet'),
                          'OL': ('s', 'brown'),
                          'EC1': ('s', 'cyan'),
                          'EC2': ('o', 'cyan'),
                          'EC3': ('^', 'cyan'),
                          'vEC': ('s', 'blue'),
Пример #6
0
def histograms():
    genes = [
        "Trpm3",
        "mt-Co1",
        "mt-Co3",
        "Nnat",
        "Ptgds",
        "Adam12",
        "Alcam",  # Up early
        "Itih5",
        "Malat1",
        "Zbtb20",
        "Spp1",
        "Col15a1",
        "Ece1",
        "Cemip",  # Up late
    ]

    ab = df_expr_ab.reindex(genes, axis=1).dropna(axis=1)
    bh = df_expr_bh.reindex(genes, axis=1).dropna(axis=1)
    assert list(ab.columns) == list(bh.columns)

    # Norm1 normalization
    # ab = ab.div(ab.sum(axis=1), axis=0)
    # bh = bh.div(bh.sum(axis=1), axis=0)

    # log1p trafo
    ab = ab.transform(lambda x: np.log(x + 1))
    bh = bh.transform(lambda x: np.log(x + 1))

    ab = ab[df_meta_ab.subclass_label == "VLMC"]
    bh = bh[df_meta_bh.celltype.str.startswith("FB")]
    assert list(ab.columns) == list(bh.columns)

    genes = sorted(ab.columns)

    # g = first(genes)

    colors = {
        'FB1': 'purple',
        'FB2': 'violet',
        '374_VLMC': "C0",
        '375_VLMC': "C1",
        '376_VLMC': "C2",
    }

    for g in genes:
        expr: pd.Series
        with Plox() as px:
            grps = [
                ab[g].groupby(
                    df_meta_ab.reindex(ab.index).cell_type_alias_label),
                bh[g].groupby(df_meta_bh.reindex(bh.index).celltype),
            ]

            for grp in grps:
                for (label, expr) in grp:
                    if any(expr):
                        f = gaussian_kde(expr)
                        xx = np.linspace(0, max(expr) * 1.5, 100)
                        px.a.plot(xx,
                                  f(xx),
                                  label=f"{label} ({len(expr)})",
                                  color=colors[label])

            px.a.set_xlabel("log1p(count)")
            px.a.legend()
            px.a.set_yticks([])
            px.f.savefig(out_dir / f"hist_{g}.png")