Пример #1
0
def plot_mvmm(mvmm, inches=8, save_dir=None):
    """
    Plots loss history and estimated Pi matrix.
    """
    if save_dir is not None:
        os.makedirs(save_dir, exist_ok=True)

    # TODO: maybe add Pi start
    loss_vals = mvmm.opt_data_['history']['loss_val']

    ################
    # Loss history #
    ################
    plot_loss_history(loss_vals,
                      loss_name='Observed data negative log-likelihood')

    if save_dir is not None:
        fpath = join(save_dir, 'loss_history.png')
        save_fig(fpath)

    ###############
    # Pi estimate
    ################
    plt.figure(figsize=(inches, inches))
    plot_Pi(mvmm.weights_mat_)
    plt.title("Estimated Pi")

    if save_dir is not None:
        fpath = join(save_dir, 'Pi_est.png')
        save_fig(fpath)
Пример #2
0
def plot_model_selection(mm_gs,
                         save_dir=None,
                         name_stub=None,
                         title='',
                         inches=8):

    n_comps = mm_gs.param_grid['n_components']
    # best_n_comp = mm_gs.best_params_['n_components']

    select_metric = mm_gs.select_metric

    # for sel_metric in mm_gs.model_sel_scores_.columns:

    scores = mm_gs.model_sel_scores_[select_metric]

    if MEASURE_MIN_GOOD[select_metric]:
        sel_n_comp = n_comps[scores.idxmin()]
    else:
        sel_n_comp = n_comps[scores.idxmax()]

    # get selected number of components
    # mm_gs.select_metric = sel_metric
    # sel_n_comp = mm_gs.best_params_['n_components']
    # mm_gs.select_metric = orig_sel_metric

    plt.figure(figsize=(inches, inches))
    plt.plot(n_comps, scores, marker='.')  # , label='bic')
    plt.axvline(sel_n_comp,
                color='black',
                ls='--',
                label='Est. number of components = {}'.format(sel_n_comp))
    set_xaxis_int_ticks()
    plt.legend()
    plt.title(title)

    plt.ylabel(select_metric)
    plt.xlabel("Number of components")

    if save_dir is not None:
        fname = '{}_model_selection.png'.format(select_metric)
        if name_stub is not None:
            fname = name_stub + '_' + fname

        save_fig(os.path.join(save_dir, fname))
Пример #3
0
                         ephys_raw=ephys_raw)

clust_labels = [
    'cluster_{}'.format(cl_idx + 1) for cl_idx in range(len(y_cnts))
]

# plot top several clusters
plot_top_clust_ephys_curves(cluster_super_means,
                            y_cnts=y_cnts,
                            overall_means=super_data_means,
                            overall_stds=super_data_stds,
                            clust_labels=clust_labels,
                            n_to_show=n_top_clust,
                            inches=inches)

save_fig(join(ephys_viz_dir, 'ephys_curves_top_clust.png'))

# plot each (non-trival) cluster
# non_trivial_clusters = y_cnts[y_cnts >= 5].index.values
non_trivial_clusters = y_cnts[y_cnts >= 0].index.values
save_dir = make_and_get_dir(ephys_viz_dir, 'cluster_curves')
for cl_idx in non_trivial_clusters:

    label = clust_labels[cl_idx]

    values = {}
    for name in cluster_super_means.keys():
        values[name] = cluster_super_means[name][cl_idx]

    plt.figure(figsize=(2 * n_datasets * inches, inches))
Пример #4
0
    ###############
    # joint level #
    ###############

    # cluster prediction data
    for k in ['joint_summary', 'y_pred_joint', 'joint_clust_best_samples']:
        out[k].to_csv(join(save_dir, '{}.csv'.format(k)))

    # metadata comparisons
    if 'joint_comparison' in out.keys():
        dump(out['joint_comparison'], join(save_dir, 'metadata_comparison'))

        plt.figure(figsize=(2 * inches, 2 * inches))
        out['joint_comparison'].plot()
        save_fig(join(save_dir, 'metadata_comparison.png'))

    # survival
    if 'joint_survival' in out.keys():
        pval = out['joint_survival']['pval']
        plt.figure(figsize=(inches, inches))
        plot_survival(df=out['joint_survival']['df'], cat_col='cluster')
        plt.xlabel("Time (days)")
        plt.ylabel("Survival")
        plt.title('{} vs. joint label, p={:1.3f}'.format(args.event_col, pval))
        save_fig(join(save_dir, 'joint_survival.png'))
        # TODO: label survival variable!

    ##############
    # View level #
    ##############
Пример #5
0
transc_sel_markers = transc[select_markers]
transc_sel_markers.\
    to_csv(join(pro_data_dir, 'transcriptomic_select_markers.csv'))

# PCA
pca = PCA(n_components='rmt_threshold', rank_sel_kws={'thresh_method': 'dg'})

pca.fit(transc_sel_markers.values)

# save scores
UD = pca.unnorm_scores_
UD = pd.DataFrame(UD,
                  index=transc_sel_markers.index,
                  columns=['pc_{}'.format(k + 1) for k in range(UD.shape[1])])
UD.index.name = transc_sel_markers.index.name
UD.name = 'transcriptomic_select_markers'
UD.to_csv(
    join(pro_data_dir,
         '{}_pca_feats.csv'.format('transcriptomic_select_markers')))

# save scree plot
plt.figure(figsize=(8, 8))
scree_plot(pca.all_svals_, color='black')
plt.axvline(pca.n_components_,
            label='{}'.format(pca.n_components_),
            color='red')
plt.legend()
save_fig(
    join(MouseETPaths().results_dir,
         '{}_rank_selection.png'.format('transcriptomic_select_markers')))
Пример #6
0
                            best_row['n_blocks_est'],
                            best_row['n_view_comp']))

    mvmm_sel_df.to_csv(os.path.join(bd_sel_dir, 'bd_mvmm_model_sel.csv'))

    # metrics2compute = mvmm_results['fit_data'][0]['models']['bd_mvmm'].metrics2compute
    metrics2compute = [args.select_metric]

    for metric in metrics2compute:
        plt.figure(figsize=(inches, inches))
        # plot_bd_mvmm_model_sel(mvmm_sel_df, select_metric=metric)
        plot_mvmm_model_selection(mvmm_sel_df, group_var='n_blocks_est',
                                  group_var_label="Number of blocks",
                                  select_metric=metric)
        # plt.title('Multiview models')
        save_fig(join(bd_sel_dir, 'bd_mvmm_model_sel_{}.png'.format(metric)))

    # fitting history
    # if isinstance(estimator, TwoStage):

    if estimator.start_.max_n_steps > 0:
        bd_start_save_dir = make_and_get_dir(opt_diag_dir,
                                             'bd_mvmm', 'start')
        bd_final_save_dir = make_and_get_dir(opt_diag_dir,
                                             'bd_mvmm', 'final')

        plot_mvmm(estimator.start_, inches=inches,
                  save_dir=bd_start_save_dir)

    else:
        bd_final_save_dir = make_and_get_dir(opt_diag_dir, 'bd_mvmm')
Пример #7
0
    plt.axhline(thresh_dg, label='DG theshold ({:1.2f})'.format(thresh_dg),
                color='blue', ls='--')
    plt.axhline(thresh_mpe, label='MPE theshold ({:1.2f})'.format(thresh_mpe),
                color='orange', ls='--')

    plt.axvline(rank_est_dg,
                label='DG rank estimate ({})'.format(rank_est_dg),
                color='blue')
    plt.axvline(rank_est_mpe,
                label='MPE rank estimate ({})'.format(rank_est_mpe),
                color='orange')

    plt.legend()
    plt.title(k)
    save_fig(join(diagnostics_dir, '{}_scree_plot.png'.format(k)))

    # # save scree plot
    # plt.figure(figsize=(10, 10))
    # scree_plot(pca.all_svals_, color='black')
    # plt.axvline(pca.n_components_,
    #             label='{}'.format(pca.n_components_),
    #             color='red')
    # plt.legend()
    # plt.ylim(0)
    # save_fig(join(diagnostics_dir, '{}_rank_selection.png'.format(k)))

    # # for bi cross validation save MSE curve
    # if pca.n_components == 'bi_cv':
    #     plt.figure(figsize=(10, 10))
    #     mse = pca.rank_sel_out_['errors']
Пример #8
0
def plot_log_pen_mvmm(mvmm, inches=8, save_dir=None):

    if save_dir is not None:
        os.makedirs(save_dir, exist_ok=True)

    # info = get_log_pen_mvmm_info(mvmm)
    # if save_dir is not None:
    #     # TODO: save this
    #     save_dir
    # else:
    #     print(info)

    Pi = mvmm.weights_mat_
    zero_thresh = 1e-10  # not sure if we need this

    summary, Pi_comm = community_summary(Pi, zero_thresh=zero_thresh)
    Pi_symlap_spec = eigh_Lsym_bp(Pi)[0]

    if 'init_params' in mvmm.opt_data_['history']:
        Pi_init = mvmm.opt_data_['history']['weights'].reshape(
            Pi.shape)  # TODO: check this
        Pi_init_symlap_spec = eigh_Lsym_bp(Pi_init)[0]
    else:
        Pi_init = None

    obs_nll = mvmm.opt_data_['history']['obs_nll']
    loss_vals = mvmm.opt_data_['history']['loss_val']

    ####################
    # Initial weights #
    ###################
    if Pi_init is not None:

        plt.figure(figsize=(inches, inches))
        plot_Pi(Pi_init)
        plt.title('weights initial value')

        if save_dir is not None:
            fpath = join(save_dir, 'weights_init.png')
            save_fig(fpath)

    ######################
    # Estimated  weights #
    ######################
    plt.figure(figsize=(2 * inches, inches))
    plt.subplot(1, 2, 1)
    plot_Pi(Pi)
    plt.title('weights estimate, n_blocks={}'.format(summary['n_communities']))

    plt.subplot(1, 2, 2)
    plot_Pi(Pi, mask=Pi_comm < zero_thresh)
    plt.title('weights estimate, block diagonal perm')

    if save_dir is not None:
        fpath = join(save_dir, 'weights_est.png')
        save_fig(fpath)

    ##########################
    # Spectrum of BD weights #
    ##########################
    plt.figure(figsize=(inches, inches))
    idxs = np.arange(1, len(Pi_symlap_spec) + 1)
    plt.plot(idxs, Pi_symlap_spec, marker='.', label='Estimate')
    if Pi_init is not None:
        plt.plot(idxs, Pi_init_symlap_spec, marker='.', label="Initial")
    plt.title('weights estimate spectrum')
    plt.ylim(0)
    plt.legend()

    if save_dir is not None:
        fpath = join(save_dir, 'weights_spectrum.png')
        save_fig(fpath)

    ###########################
    # Obs NLL for entire path #
    ###########################
    plt.figure(figsize=[inches, inches])
    plot_loss_history(obs_nll, loss_name="Obs NLL")

    if save_dir is not None:
        fpath = join(save_dir, 'obs_nll.png')
        save_fig(fpath)

    plt.figure(figsize=[inches, inches])
    plot_loss_history(loss_vals, loss_name="log penalized obs nll")

    if save_dir is not None:
        fpath = join(save_dir, 'loss_vals.png')
        save_fig(fpath)
Пример #9
0
def plot_bd_mvmm(mvmm, inches=8, save_dir=None):
    """
    Initial BD weights, Estimated BD weights, spectrums of both
    Number of steps in each adaptive stage
    Evals of entire path
    Loss history for each segment
    """
    if save_dir is not None:
        os.makedirs(save_dir, exist_ok=True)

    info = get_bd_mvmm_info(mvmm)
    if save_dir is not None:
        # TODO: save this
        save_dir
    else:
        print(info)

    # BD weight estimate
    bd_weights = mvmm.bd_weights_
    zero_thresh = mvmm.zero_thresh
    summary, Pi_comm = community_summary(bd_weights, zero_thresh=zero_thresh)
    bd_weights_symlap_spec = eigh_Lsym_bp(bd_weights)[0]

    # initial BD weights
    bd_weights_init = mvmm.opt_data_['adpt_opt_data']['adapt_pen_history'][
        'opt_data'][0]['history']['init_params']['bd_weights']
    bd_weights_init_symlap_spec = eigh_Lsym_bp(bd_weights_init)[0]

    # optimization history
    adpt_history = mvmm.opt_data_['adpt_opt_data']['adapt_pen_history'][
        'opt_data']

    if 'raw_eval_sum' in adpt_history[0]['history']:
        n_steps = [
            len(adpt_history[i]['history']['raw_eval_sum'])
            for i in range(len(adpt_history))
        ]
        n_steps_cumsum = np.cumsum(n_steps)

        raw_eval_sum = \
            np.concatenate([adpt_history[i]['history']['raw_eval_sum']
                           for i in range(len(adpt_history))])

    else:
        raw_eval_sum = None
        n_steps = None
        n_steps_cumsum = None

    obs_nll = np.concatenate([
        adpt_history[i]['history']['obs_nll'] for i in range(len(adpt_history))
    ])

    if mvmm.opt_data_['ft_opt_data'] is not None:
        fine_tune_obs_nll = mvmm.opt_data_['ft_opt_data']['history']['obs_nll']
    else:
        fine_tune_obs_nll = None

    ######################
    # Initial BD weights #
    ######################
    plt.figure(figsize=(inches, inches))
    plot_Pi(bd_weights_init)
    plt.title('BD weights initial value')

    if save_dir is not None:
        fpath = join(save_dir, 'BD_weights_init.png')
        save_fig(fpath)

    ########################
    # Estimated BD weights #
    ########################
    plt.figure(figsize=(2 * inches, inches))
    plt.subplot(1, 2, 1)
    plot_Pi(bd_weights)
    plt.title('BD weights estimate, n_blocks={}'.format(
        summary['n_communities']))

    plt.subplot(1, 2, 2)
    plot_Pi(bd_weights, mask=Pi_comm < zero_thresh)
    plt.title('BD weights estimate, block diagonal perm')

    if save_dir is not None:
        fpath = join(save_dir, 'BD_weights_est.png')
        save_fig(fpath)

    ##########################
    # Spectrum of BD weights #
    ##########################
    plt.figure(figsize=(inches, inches))
    idxs = np.arange(1, len(bd_weights_symlap_spec) + 1)
    plt.plot(idxs, bd_weights_symlap_spec, marker='.', label='Estimate')
    plt.plot(idxs, bd_weights_init_symlap_spec, marker='.', label="Initial")
    plt.title('BD weights estimate spectrum')
    plt.ylim(0)
    plt.legend()

    if save_dir is not None:
        fpath = join(save_dir, 'BD_weights_spectrum.png')
        save_fig(fpath)

    ##################################
    # Number of steps for each stage #
    ##################################

    if n_steps is not None:
        plt.figure(figsize=(inches, inches))
        idxs = np.arange(1, len(n_steps) + 1)
        plt.plot(idxs, n_steps, marker='.')
        plt.ylim(0)
        plt.ylabel("Number of steps")
        plt.xlabel("Adaptive stage")

        if save_dir is not None:
            fpath = join(save_dir, 'n_steps.png')
            save_fig(fpath)

    ###########################
    # Obs NLL for entire path #
    ###########################
    plt.figure(figsize=[inches, inches])
    plot_loss_history(obs_nll, loss_name="Obs NLL (entire path)")

    if save_dir is not None:
        fpath = join(save_dir, 'path_obs_nll.png')
        save_fig(fpath)

    #########################
    # Evals for entire path #
    #########################
    if raw_eval_sum is not None:
        plt.figure(figsize=[inches, inches])
        plt.plot(np.log10(raw_eval_sum), marker='.')
        plt.ylabel('log10(sum smallest evals)')
        plt.xlabel('step')
        plt.title('Eigenvalue history (entire path)')
        for s in n_steps_cumsum:
            plt.axvline(s - 1, color='grey')

        if save_dir is not None:
            fpath = join(save_dir, 'path_evals.png')
            save_fig(fpath)

    ###########################
    # Losses for each segment #
    ###########################
    if save_dir is not None:
        segment_dir = join(save_dir, 'segments')
        os.makedirs(segment_dir, exist_ok=True)

    for i in range(len(adpt_history)):
        loss_vals = adpt_history[i]['history']['loss_val']
        plot_loss_history(loss_vals,
                          'loss val, adapt segment {}'.format(i + 1))

        if save_dir is not None:
            fpath = join(segment_dir, 'loss_history_{}.png'.format(i + 1))
            save_fig(fpath)

    ##########################
    # fine tune loss history #
    ##########################
    if fine_tune_obs_nll is not None:
        plot_loss_history(fine_tune_obs_nll, 'fine tune obs NLL')

        if save_dir is not None:
            fpath = join(segment_dir, 'fine_tune_loss_history.png')
            save_fig(fpath)
Пример #10
0
# additional results #
######################

n_blocks_true = community_summary(Pi_true)[0]['n_communities']
n_comp_true = (Pi_true > 0).sum()

###########
# Pi true #
###########

plt.figure()
sns.heatmap(Pi_true, cmap='Blues', square=True, cbar=True, vmin=0)
plt.xlabel('View 1 clusters')
plt.ylabel('View 2 clusters')
plt.title("Pi True")
save_fig(os.path.join(save_dir, 'Pi_true.png'))

if 'bd_mvmm' not in to_exclude:

    #########################
    # Block diagonal Pi Viz #
    #########################
    bd_mvmm = models['bd_mvmm'].final_

    print('\n\n\n BD summary\n\n\n')
    summarize_bd(D=bd_mvmm.bd_weights_,
                 n_blocks=n_blocks_true,
                 zero_thresh=zero_thresh)
    plt.title('BD estimate')
    save_fig(os.path.join(save_dir, 'Pi_est_bd_mvmm.png'))
Пример #11
0
    'fit_time': None,
    'n_comp_est': None,
    'bic': None,
    'aic': None,
    'silhouette': None,
    'calinski_harabasz': None,
    'davies_bouldin': None,
    'dunn': None
}

###########################
# classification accuracy #
###########################
plt.figure(figsize=[inches, inches])
plot_clf_results(results)
save_fig(join(results_save_dir, 'clf_acc'))

###########
# Pi True #
###########
Pi_true = pi_true_summary['Pi']
Pi_true_mask = Pi_true == 0

# non-paper version of Pi_true
plt.figure(figsize=[inches, inches])
sns.heatmap(Pi_true.T, cmap='Blues', square=True, cbar=False, vmin=0)
plt.xlabel('View 1 clusters')
plt.ylabel('View 2 clusters')
title = 'Pi True\nNumber of blocks={}\nBlock shapes: {}'.\
    format(pi_true_summary['summary']['n_communities'],
           pi_true_summary['summary']['comm_shapes'])
Пример #12
0
        pd.DataFrame(out[k]).to_csv(join(save_dir, '{}.csv'.format(k)))

    if 'comparison' in out.keys():
        dump(out['comparison'], join(save_dir, 'variable_comparison'))

    if 'cl_super_means' in out.keys():
        out['cl_super_means'].to_csv(join(save_dir, 'cl_super_means.csv'))
        out['stand_cl_super_means'].\
            to_csv(join(save_dir, 'stand_cl_super_means.csv'))

    ############
    # Plot PCA #
    ############
    plt.figure(figsize=(inches, inches))
    plot_gmm_pcs(gmm=model, X=X.values)
    save_fig(join(save_dir, 'pca_projections.png'))

    ######################
    # Plot cluster means #
    ######################
    mean_save_dir = make_and_get_dir(save_dir, 'cl_means')
    scatter_save_dir = make_and_get_dir(mean_save_dir, 'scatter')

    if 'stand_cl_super_means' in out.keys():
        stand_cl_means = out['stand_cl_super_means']
    else:
        stand_cl_means = out['stand_cl_means']

    cluster_labels = out['info']['cluster_labels']

    for cl_idx in range(stand_cl_means.shape[0]):
Пример #13
0
        est_n_comp = estimator.n_components
        res_writer.write("Cat data GMM estimated number of components: {}".
                         format(est_n_comp))

        # _model_sel_dir = make_and_get_dir(model_sel_dir, 'cat_gmm')
        # plt.figure(figsize=(inches, inches))
        plot_model_selection(cat_model, save_dir=model_sel_dir,
                             name_stub='cat_gmm',
                             title='GMM on concatenated data',
                             inches=inches)

        # optimization history
        loss_history = estimator.opt_data_['history']['obs_nll']
        plot_loss_history(loss_history,
                          loss_name='Observed data negative log-likelihood')
        save_fig(os.path.join(opt_diag_dir, 'cat_best_model_opt_history.png'))

    else:
        print("cat isnt fit...")


########################
# view marginal models #
########################

n_views = len(view_models)
for v in range(n_views):

    estimator = view_models[v].best_estimator_
    sel_models['view_' + str(v)] = estimator