def cluster_data_with_threshold(data,
                                min_val=None,
                                n=None,
                                mad=None,
                                min_over=2,
                                transform=None,
                                **kwargs):
    if min_val is not None and min_over is not None:
        idx = (data > min_val).sum(axis=1) > min_over
        data = data.loc[idx]

    if transform is not None:
        data = transform(data)

    if n is not None:
        if mad is None:
            mad = transformations.median_absolute_deviation(data).sort_values(
                ascending=False)
        else:
            mad = mad.sort_values(ascending=False)
            if len(mad.index.intersection(data.index)) != data.shape[0]:
                raise AttributeError(
                    "If a pre-computed MAD is supplied, it must contain all required entries"
                )

        data = data.loc[mad.index[:n]]

    cm = clustering.plot_clustermap(data,
                                    cmap='RdBu_r',
                                    metric='correlation',
                                    **kwargs)
    cm.gs.update(bottom=0.2)
    return cm, mad
def plot_clustermap(data, yugene=False, n_genes=N_GENES, yugene_resolve_ties=False, **kwargs):
    if yugene:
        data = process.yugene_transform(data, resolve_ties=yugene_resolve_ties)

    kwargs.setdefault('cmap', 'RdBu_r')

    mad = transformations.median_absolute_deviation(data, axis=1).sort_values(ascending=False)
    top_mad = mad.iloc[:n_genes].index
    z = hierarchy.linkage(data.loc[top_mad].transpose(), method='average', metric='correlation')
    cg = clustering.plot_clustermap(
        data.loc[top_mad],
        col_linkage=z,
        **kwargs
    )
    plt.setp(
        cg.ax_heatmap.xaxis.get_ticklabels(), rotation=90
    )
    cg.gs.update(bottom=0.2)

    # it is helpful to have access to the row index so we'll add it here
    # I *think* certain kwargs might cause this to fail (if no row dend has been computed?) so add a generic try-exc
    try:
        cg.row_index = top_mad[cg.dendrogram_row.reordered_ind]
    except Exception:
        pass

    return cg
Exemplo n.º 3
0
        ax.figure.savefig(os.path.join(
            outdir, "pca_top%d_by_mad_with_names.png" % n_t),
                          dpi=200)

    row_colours = pd.DataFrame('gray', index=our_dat.columns, columns=[''])
    row_colours.loc[row_colours.index.str.contains(
        r'eNSC[0-9]med')] = '#66c2a5'
    row_colours.loc[row_colours.index.str.contains(
        r'eNSC[0-9]mouse')] = '#fc8d62'
    row_colours.loc[row_colours.index.str.contains(
        r'mDura.[AN0-9]*mouse')] = '#8da0cb'
    row_colours.loc[row_colours.index.str.contains(
        r'mDura.[AN0-9]*human')] = '#e78ac3'

    for n_t in n_gene_try:
        fname = "clustering_by_gene_corr_log_top%d_by_mad.{ext}" % n_t

        d = clustering.dendrogram_with_colours(our_dat.loc[mad.index[:n_t]],
                                               row_colours,
                                               fig_kws={'figsize': (10, 5.5)})
        d['fig'].savefig(os.path.join(outdir, fname.format(ext='png')),
                         dpi=200)

        cm = clustering.plot_clustermap(our_dat.loc[mad.index[:n_t]],
                                        cmap='RdBu_r',
                                        metric='correlation',
                                        col_colors=row_colours)
        cm.gs.update(bottom=0.2)
        cm.savefig(os.path.join(
            outdir, "clustermap_by_gene_corr_log_top%d_by_mad.png" % n_t),
                   dpi=200)
Exemplo n.º 4
0
    # reduce to significant and relevant
    keep_genes = cor_gene.index[(cor_gene.abs() > cross_corr_threshold)
                                & (pval_gene < alpha)]

    # remove MYC itself when reporting
    print "Having aggregated these by gene, %d are correlated with %s" % (len(
        keep_genes.drop(myc_gene)), myc_gene)

    # cluster using this representation of the data
    # force the order of the columns to match the correlation with MYC
    keep_genes = cor_gene.loc[keep_genes].sort_values(ascending=False).index
    cg = clustering.plot_clustermap(dat_corr_with_myc_aggr.loc[keep_genes],
                                    cmap='RdBu_r',
                                    metric='euclidean',
                                    method='ward',
                                    row_cluster=False,
                                    vmin=-4.5,
                                    vmax=4.5)
    cg.gs.update(bottom=0.1)

    jennie_list = [
        'MYC',
        'CXCL2',
        'CXCL1',
        'TNFAIP3',
        'IL8',
        'C17orf47',
        'RAET1L',
        'TEX14',
        'SERPINE1',
Exemplo n.º 5
0
        header=None).squeeze().str.decode('utf-8')
    ssgsea.index = ssgsea_pathway_names.reindex(
        ssgsea.index.str.replace('_', ' '))

    # heatmap: proportions for each patient
    # standardise across columns, because each cell type has different mean proportion
    rl = hc.linkage(xcell_prop.astype(float).transpose(),
                    method='average',
                    metric='euclidean')

    cg = clustering.plot_clustermap(
        xcell_prop.astype(float).transpose(),
        metric='euclidean',
        show_gene_labels=False,
        show_gene_clustering=True,
        cmap='YlOrRd',
        row_linkage=rl,
        z_score=1,
        vmin=-1.5,
        vmax=6.,
    )
    cg.gs.update(left=0.03, bottom=0.22, right=0.9)
    c_labels = [''] * len(cg.cax.get_yticks())
    c_labels[0] = 'Low'
    c_labels[-1] = 'High'
    cg.cax.set_yticklabels(c_labels)
    cg.cax.set_ylabel(
        'Normalised proportion',
        labelpad=-70)  # bit hacky, but this places the label correctly
    cg.savefig(os.path.join(outdir, "cell_proportion_cluster_by_patient.png"),
               dpi=200)
Exemplo n.º 6
0
    # relabel the FFPE samples
    idx = obj.meta.index.tolist()
    for k, v in hgic_consts.NH_ID_TO_PATIENT_ID_MAP.items():
        for i, t in enumerate(idx):
            if k.replace('-', '_') in t:
                idx[i] = "FFPE GBM%s" % v
    obj.meta.index = idx
    obj.data.columns = idx

    cpm = obj.data.divide(obj.data.sum(), axis=1)
    lcpm = np.log2((obj.data + 1).divide((obj.data + 1).sum(), axis=1))

    mad = transformations.median_absolute_deviation(lcpm).sort_values(
        ascending=False)

    cg = clustering.plot_clustermap(lcpm.loc[mad.index[:3000]], cmap='RdBu_r')
    cg.gs.update(bottom=0.15)
    cg.savefig(os.path.join(outdir, "cluster_by_top_3000_genes.png"), dpi=200)

    # load Verhaak signatures
    # manual amendments:
    # Classical. C14orf159 -> DGLUCY, KIAA0494 -> EFCAB14, LHFP -> LHFPL6
    # Proneural. HN1 -> JPT1, PAK7 -> PAK5, ZNF643 -> ZFP69B

    cl = [
        'PTPRA', 'ELOVL2', 'MLC1', 'SOX9', 'ARNTL', 'DENND2A', 'BBS1',
        'ABLIM1', 'PAX6', 'ZHX3', 'USP8', 'PLCG1', 'CDH4', 'RASGRP1', 'ACSBG1',
        'CST3', 'BCKDHB', 'LHFPL6', 'VAV3', 'ACSL3', 'EYA2', 'SEPT11',
        'SLC4A4', 'SLC20A2', 'DGLUCY', 'CTNND1', 'ZFHX4', 'SPRY2', 'ZNF45',
        'NCOA1', 'PLCE1', 'DTNA', 'POLRMT', 'SALL1', 'TYK2', 'TJP1', 'MEOX2',
        'FGFR3', 'STXBP3', 'GRIK1', 'GATM', 'UPF1', 'NPEPL1', 'EFCAB14',
Exemplo n.º 7
0
        the_data = rnaseq_obj.data.loc[:,
                                       rnaseq_obj.data.columns.str.
                                       contains("GBM%s" % pid)]
        the_aggr = the_data.mean(axis=1)
        dat_gbm_aggr.loc[:, pid] = the_aggr

    # select genes
    g = set(venn_set['111111'])
    for x in sets_full.values() + sets_partial.values():
        for k in x:
            g.update(venn_set[k])

    the_data = dat_gbm_aggr.loc[g]

    # remove any rows that have no variation
    the_data = the_data.loc[~(the_data.diff(axis=1).iloc[:,
                                                         1:] == 0).all(axis=1)]
    the_data = np.log2(the_data + 1)

    cg = clustering.plot_clustermap(the_data,
                                    cmap='RdBu_r',
                                    vmax=12.,
                                    figsize=(3.6, 8))
    cg.gs.update(bottom=0.1)
    cg.savefig(os.path.join(outdir,
                            "clustermap_gbm_by_subgroup_gene_sets.png"),
               dpi=200)
    cg.savefig(os.path.join(outdir,
                            "clustermap_gbm_by_subgroup_gene_sets.tiff"),
               dpi=200)
def plot_clustermap(obj,
                    quantile_norm,
                    method='average',
                    metric='correlation',
                    n_gene_by_mad=5000,
                    n_gene_for_heatmap=500,
                    fmin=0.05,
                    fmax=0.95,
                    eps=0.01,
                    cell_line_colours=None):
    if cell_line_colours is None:
        cell_line_colours = {
            'FB': '#fff89e',  # yellow
            'GBM (this study)': '#e6e6e6',  # light gray
            'GBM': '#4d4d4d',  # dark grey
            'ESC': '#ff7777',  # light red
            'iPSC': '#990000',  # dark red
            'iPSC (this study)': '#fdc086',  # orange
            'NSC': '#006600',  # dark green
            'iNSC (this study)': '#7fc97f',  # green
        }

    the_dat = np.log2(obj.data + eps)

    if quantile_norm is not None:
        the_dat = transformations.quantile_normalisation(the_dat,
                                                         method=quantile_norm)
    the_mad = transformations.median_absolute_deviation(the_dat).sort_values(
        ascending=False)
    cc, st, leg_dict = construct_colour_array_legend_studies(obj.meta)

    # linkage
    lkg = hc.linkage(
        the_dat.loc[the_mad.index[:n_gene_by_mad]].transpose(),
        method=method,
        metric=metric,
    )

    # ref line colours
    for k, v in cell_line_colours.items():
        cc.loc[obj.meta.type == k, 'Cell type'] = v
    # our line colours
    cc.loc[obj.meta.batch.str.contains('wtchg') & (obj.meta.type == 'iPSC'), 'Cell type'] = \
    cell_line_colours['iPSC (this study)']

    # get appropriate clims
    the_dat = the_dat.loc[the_mad.index[:n_for_heatmap]]
    the_dat_flat = np.sort(the_dat.values.flatten())
    vmin = the_dat_flat[int(len(the_dat_flat) * fmin)] - 0.5
    vmax = the_dat_flat[int(len(the_dat_flat) * fmax)] + 0.5

    gc = clustering.plot_clustermap(
        the_dat.loc[the_mad.index[:n_gene_for_heatmap]],
        cmap='RdBu_r',
        col_linkage=lkg,
        col_colors=cc,
        vmin=vmin,
        vmax=vmax,
    )

    leg_entry = {
        'class': 'patch',
        'edgecolor': 'k',
        'linewidth': 1.,
    }
    leg_dict2 = collections.OrderedDict()
    leg_dict2['Cell type'] = collections.OrderedDict()

    for k in sorted(cell_line_colours):
        if k.replace(' (this study)', '') in obj.meta.type.unique():
            leg_dict2['Cell type'][k] = dict(leg_entry)
            leg_dict2['Cell type'][k].update(
                {'facecolor': cell_line_colours[k]})

    leg_dict2['Study'] = {}
    for k, v in leg_dict['Study'].items():
        leg_dict2['Study'][k] = dict(leg_entry)
        leg_dict2['Study'][k].update({'facecolor': v})

    common.add_custom_legend(gc.ax_heatmap,
                             leg_dict2,
                             loc_outside=True,
                             fontsize=14)
    format_clustermap(gc)

    return gc
    ax.figure.set_size_inches(5.9, 4.8)
    ax.figure.subplots_adjust(right=0.8, left=0.12, bottom=0.1, top=0.98)
    ax.figure.savefig(os.path.join(outdir, "pca_our_samples.png"), dpi=200)

    # clustermap: just our samples
    colour_bar = pd.DataFrame(treatment_colour['Rheb KO'],
                              index=dat.columns,
                              columns=[''])
    colour_bar.loc[colour_subgroups == 'WT'] = treatment_colour['WT']

    this_mad = transformations.median_absolute_deviation(log_dat).sort_values(
        ascending=False)
    this_log_dat = log_dat.loc[this_mad.index[:n_by_mad]]
    cm = clustering.plot_clustermap(this_log_dat,
                                    cmap='RdYlBu_r',
                                    metric='correlation',
                                    col_colors=colour_bar,
                                    vmin=-2,
                                    vmax=2)
    cm.fig.set_size_inches(5, 8.4)
    cm.gs.update(bottom=0.15, right=0.98)
    cm.savefig(os.path.join(outdir, "clustermap_our_samples.png"), dpi=200)

    # DE 3 vs 3
    dat = filter.filter_by_cpm(obj_star.data, min_cpm=min_cpm, min_n_samples=2)

    the_groups = obj_star.meta.treatment.str.replace(
        'Rheb KO', 'Rheb_KO')  # group names must be valid in R
    the_comparison = ('Rheb_KO', 'WT')
    de_res = differential_expression.run_one_de(dat,
                                                the_groups,
                                                the_comparison,
Exemplo n.º 10
0
    # norm = colors.Normalize(vmin=-1, vmax=0.)
    # sm = plt.cm.ScalarMappable(norm=norm, cmap=cmap)
    # vals = [colors.rgb2hex(sm.to_rgba(t)) for t in base_n]

    norm = colors.Normalize(vmin=base.min(), vmax=base.max())
    sm = plt.cm.ScalarMappable(cmap=plt.cm.gray_r, norm=norm)

    vals = [colors.rgb2hex(sm.to_rgba(t)) for t in base]

    col_colours = pd.DataFrame(vals, index=the_data.columns, columns=['MYC'])

    cg = clustering.plot_clustermap(
        dat_corr_with_myc_aggr.loc[keep_genes_sorted],
        cmap='RdBu_r',
        metric='euclidean',
        method='ward',
        row_cluster=False,
        col_colors=col_colours,
        vmin=-8,
        vmax=8)
    cg.fig.set_size_inches((7., 7.))
    cg.cax.set_ylabel("Gene expression")
    cg.cax.yaxis.set_label_coords(-.7, 0.5)
    cg.gs.update(bottom=0.12, left=0.04, top=0.97, right=0.93)
    cg.savefig(os.path.join(outdir, 'myc_genes_clustermap.png'), dpi=200)
    cg.savefig(os.path.join(outdir, 'myc_genes_clustermap.tiff'), dpi=200)

    # does the clustering partition by MYC expression level?
    dend = cg.dendrogram_col.calculate_dendrogram()
    lkg = cg.dendrogram_col.linkage
Exemplo n.º 11
0
    # clustermap
    colour_bar = pd.DataFrame(treatment_colour['Rheb KO'],
                              index=dat.columns,
                              columns=[''])
    colour_bar.loc[meta.treatment == 'WT'] = treatment_colour['WT']

    this_mad = transformations.median_absolute_deviation(log_dat).sort_values(
        ascending=False)
    this_log_dat = log_dat.loc[this_mad.index[:n_by_mad]]

    # version 1: cluster rows
    cm = clustering.plot_clustermap(
        this_log_dat,
        cmap='RdYlBu_r',
        metric='correlation',
        col_colors=colour_bar,
        vmin=-2,
        vmax=2,
    )

    leg_dict = {
        'fl/fl': {
            'class': 'patch',
            'edgecolor': 'none',
            'facecolor': treatment_colour['WT'],
        },
        r'$\Delta$/$\Delta$': {
            'class': 'patch',
            'edgecolor': 'none',
            'facecolor': treatment_colour['Rheb KO'],
        },
Exemplo n.º 12
0
           'Cell type'] = cell_line_colours['iNSC (this study)']
    cc.loc[the_obj.meta.batch.str.contains('wtchg') &
           (the_obj.meta.type == 'iPSC'),
           'Cell type'] = cell_line_colours['iPSC (this study)']

    # get appropriate clims
    the_dat = the_dat.loc[the_mad.index[:n_for_heatmap]]
    the_dat_flat = np.sort(the_dat.values.flatten())
    fmin = 0.05
    fmax = 0.95
    vmin = the_dat_flat[int(len(the_dat_flat) * fmin)] - 0.5
    vmax = the_dat_flat[int(len(the_dat_flat) * fmax)] + 0.5

    gc = clustering.plot_clustermap(the_dat.loc[the_mad.index[:n_for_heatmap]],
                                    cmap='RdBu_r',
                                    col_linkage=dend['linkage'],
                                    col_colors=cc,
                                    vmin=vmin,
                                    vmax=vmax)

    leg_entry = {
        'class': 'patch',
        'edgecolor': 'k',
        'linewidth': 1.,
    }
    leg_dict2 = collections.OrderedDict()
    leg_dict2['Cell type'] = collections.OrderedDict()

    for k in sorted(cell_line_colours):
        if k.replace(' (this study)', '') in the_obj.meta.type.unique():
            leg_dict2['Cell type'][k] = dict(leg_entry)
            leg_dict2['Cell type'][k].update(
Exemplo n.º 13
0
    ix = obj2.meta.type == 'ESC'
    obj2.filter_samples(ix)

    dend = plot_dendrogram([obj1, obj2], qn_method=quantile_norm, n_by_mad=n_gene_by_mad)
    dend['fig'].savefig(os.path.join(outdir, "cluster_ipsc_esc.png"), dpi=200)

    # 3. iPSC, ESC, Ruiz signature (only)
    the_obj = loader.MultipleBatchLoader([obj1, obj2])
    dat_r_z = pd.DataFrame(np.log2(the_obj.data + eps))
    dat_r_z = dat_r_z.reindex(gene_sign_ens.values).dropna()
    for r in dat_r_z.index:
        dat_r_z.loc[r] = zscore(dat_r_z.loc[r])

    dat_r_z.index = gene_sign_ens.index[gene_sign_ens.isin(dat_r_z.index)]

    cg = clustering.plot_clustermap(dat_r_z, show_gene_labels=True, cmap='RdBu_r')
    cg.gs.update(bottom=0.2)
    cg.savefig(os.path.join(outdir, "clustermap_ruiz_ipsc_esc_ztrans.png"), dpi=200)

    # 4. HipSci, iPSC, ESC, FB
    obj1 = copy(obj)
    ix = obj1.meta.type.isin(['iPSC', 'FB'])
    obj1.filter_samples(ix)

    dend = plot_dendrogram([obj1, ref_obj, hip_obj], qn_method=quantile_norm, n_by_mad=n_gene_by_mad)
    dend['fig'].savefig(os.path.join(outdir, "cluster_ipsc_esc_fb_with_hipsci%d.png" % n_hipsci), dpi=200)

    # 5. HipSci, iPSC, ESC
    obj1 = copy(obj)
    ix = obj1.meta.type.isin(['iPSC'])
    obj1.filter_samples(ix)
Exemplo n.º 14
0
                "nsc_correlation_dendrogram_vsttransform_top%d.pdf" % NGENE))

        cg.gs.update(bottom=0.3, right=0.7)
        cg.fig.savefig(os.path.join(
            outdir,
            "nsc_correlation_clustermap_vsttransform_top%d.png" % NGENE),
                       dpi=200)
        cg.fig.savefig(
            os.path.join(
                outdir,
                "nsc_correlation_clustermap_vsttransform_top%d.pdf" % NGENE))

        cg = clustering.plot_clustermap(
            log_nsc_data.loc[mad_log_nsc_srt.index[:NGENE]],
            show_gene_labels=False,
            rotate_xticklabels=True,
            cmap='RdBu_r',
            metric='correlation',
        )
        cg.gs.update(bottom=0.2)
        cg.fig.savefig(os.path.join(
            outdir,
            "nsc_expression_clustermap_logtransform_top%d.png" % NGENE),
                       dpi=200)
        cg.fig.savefig(
            os.path.join(
                outdir,
                "nsc_expression_clustermap_logtransform_top%d.pdf" % NGENE))
        cg = clustering.plot_clustermap(
            vst_nsc_data.loc[mad_vst_nsc_srt.index[:NGENE]],
            show_gene_labels=False,
Exemplo n.º 15
0
        "Unknown": 'gray'
    }
    wang_cmap = {
        'PN': consts.SUBGROUP_SET_COLOURS['RTK I partial'],
        'CL': consts.SUBGROUP_SET_COLOURS['RTK II partial'],
        'MS': consts.SUBGROUP_SET_COLOURS['MES partial'],
        'Unknown': 'grey'
    }
    row_colours.insert(0, 'Sturm', sturm_class.map(sturm_cmap))
    row_colours.insert(0, 'Verhaak', wang_class.map(wang_cmap))

    cg = clustering.plot_clustermap(xcell_tcga.astype(float).transpose(),
                                    metric='euclidean',
                                    show_gene_labels=False,
                                    show_gene_clustering=True,
                                    cmap='YlOrRd',
                                    row_linkage=rl,
                                    z_score=1,
                                    vmin=-1.5,
                                    vmax=6.,
                                    row_colors=row_colours)
    cg.gs.update(left=0.03, bottom=0.22, right=0.9)
    c_labels = [''] * len(cg.cax.get_yticks())
    c_labels[0] = 'Low'
    c_labels[-1] = 'High'
    cg.cax.set_yticklabels(c_labels)
    cg.cax.set_ylabel(
        'Normalised proportion',
        labelpad=-70)  # bit hacky, but this places the label correctly
    cg.savefig(os.path.join(outdir, "cell_proportion_cluster_by_patient.png"),
               dpi=200)
    cg.savefig(os.path.join(outdir, "cell_proportion_cluster_by_patient.tiff"),
Exemplo n.º 16
0
    cv.plot.barh(color='k', ax=ax)
    fig.subplots_adjust(bottom=0.07, left=0.4, right=0.98, top=0.98)
    ax.set_xlabel('CV across samples')
    fig.savefig(os.path.join(outdir, "cv_across_samples.png"), dpi=200)
    fig.savefig(os.path.join(outdir, "cv_across_samples.tiff"), dpi=200)

    # heatmap: proportions for each patient
    # standardise across columns, because each cell type has different mean proportion
    rl = hc.linkage(df.astype(float).transpose(),
                    method='average',
                    metric='euclidean')

    cg = clustering.plot_clustermap(df.astype(float).transpose(),
                                    metric='euclidean',
                                    show_gene_labels=True,
                                    show_gene_clustering=True,
                                    cmap='YlOrRd',
                                    row_linkage=rl,
                                    z_score=1,
                                    figsize=(5.5, 5.5))
    # cg.gs.update(left=0.03, bottom=0.22, right=0.9)
    cg.gs.update(left=0.1, bottom=0.4, right=0.9, top=0.93)
    cg.cax.set_yticklabels(['Low', '', '', '', 'High'])
    cg.cax.set_ylabel(
        'Normalised\nproportion',
        labelpad=-70)  # bit hacky, but this places the label correctly
    cg.savefig(os.path.join(outdir, "cell_proportion_cluster_by_patient.png"),
               dpi=200)
    cg.savefig(os.path.join(outdir, "cell_proportion_cluster_by_patient.tiff"),
               dpi=200)
    cg.savefig(os.path.join(outdir, "cell_proportion_cluster_by_patient.pdf"),
               dpi=200)
        'class': 'patch',
        'edgecolor': 'k',
        'linewidth': 1.,
    }

    lkg = plt_dict[clust_n_ftr]['linkage']
    leg_dict = collections.OrderedDict()
    for k in sorted(cell_line_colours):
        if cell_line_colours[k] in row_colours_all.values:
            leg_dict[k] = dict(leg_entry)
            leg_dict[k].update({'facecolor': cell_line_colours[k]})

    cm = clustering.plot_clustermap(this_dat,
                                    cmap='RdYlBu_r',
                                    metric='correlation',
                                    col_colors=row_colours_all,
                                    col_linkage=lkg,
                                    vmin=-10,
                                    vmax=10)
    cm.fig.set_size_inches((10.9, 8.))

    common.add_custom_legend(cm.ax_heatmap,
                             leg_dict,
                             loc_outside=True,
                             fontsize=14)
    cm.gs.update(bottom=0.3, right=0.79, left=0.01)

    cm.savefig(os.path.join(outdir, "clustermap_ipsc_esc_nsc_fb.png"), dpi=200)
    cm.savefig(os.path.join(outdir, "clustermap_ipsc_esc_nsc_fb.tiff"),
               dpi=200)
Exemplo n.º 18
0
    # lnk = hc.linkage(dist)
    # dend = clustering.dendrogram_with_colours(
    #     dat,
    #     cc,
    #     linkage=lnk,
    #     vertical=True,
    #     legend_labels=leg_dict,
    #     fig_kws={'figsize': [14, 6]}
    # )

    # Pearson correlation distance
    dend = clustering.dendrogram_with_colours(dat, cc, vertical=True, legend_labels=leg_dict, fig_kws={'figsize': [14, 6]})

    # Pearson with a limited number of probes
    # dend = clustering.dendrogram_with_colours(dat.loc[mad.index[:5000]], cc, vertical=True, legend_labels=leg_dict, fig_kws={'figsize': [14, 6]})

    dend['fig'].savefig(os.path.join(outdir, "cluster_ipsc_esc_fb_all_probes.png"), dpi=200)

    # similar, but clustermap (dendrogram + heatmap)
    gc = clustering.plot_clustermap(
        dat.loc[mad.index[:5000]],
        cmap='RdBu_r',
        col_linkage=dend['linkage'],
        col_colors=cc
    )
    clustering.add_legend(leg_dict, gc.ax_heatmap, loc='right')
    gc.gs.update(bottom=0.2, right=0.82)

    gc.savefig(os.path.join(outdir, "clustermap_ipsc_esc_fb_all_probes.png"), dpi=200)