def plot_GAM(gams, X, Y, size=4, dpi=300, ext='png', filename=None):
    cols = X.shape[1]
    rows = Y.shape[1]
    colors = sns.color_palette(n_colors=rows)
    plt.rcParams['figure.figsize'] = (cols*size, rows*size)
    fig, mat_axs = plt.subplots(rows, cols)
    titles = X.columns
    for j, (name, out) in enumerate(gams.items()):
        axs = mat_axs[j]
        gam = out['model']
        R2 = get_avg_score(out['scores_cv'])
        p_vals = gam.statistics_['p_values']
        for i, ax in enumerate(axs):
            plot_term(gam, i, ax, colors[j], size=size)
            ax.set_xlabel('')
            ax.text(.5, .95, 'p< %s' % format_num(p_vals[i]), va='center', 
                    fontsize=size*3, transform=ax.transAxes)
            if j%2==0:
                ax.set_title(titles[i], fontsize=size*4)
            if i==0:
                ax.set_ylabel(name + ' (%s)' % format_num(R2), 
                              fontsize=size*4)
            else:
                ax.set_ylabel('')
                
    plt.subplots_adjust(hspace=.4)
    if filename is not None:
        save_figure(fig, '%s.%s' % (filename,ext),
                    {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
def calc_EFA_retest(results, rotate='oblimin', verbose=True):
    name = results.ID.split('_')[0].title()    
    retest_data_raw = get_behav_data(dataset=results.dataset.replace('Complete','Retest'),
                                     file='meaningful_variables.csv')
    shared_ids = set(retest_data_raw.index) & set(results.data.index)
    retest_data_raw = retest_data_raw.loc[shared_ids, :]
    retest_scores = transfer_scores(retest_data_raw, results, rotate=rotate)
    retest_scores.columns = [str(i)+' Retest' for i in retest_scores.columns]
    # scale and perform the factor score transformation
    EFA = results.EFA
    c = EFA.get_c()
    ref_scores = EFA.get_scores(c=c, rotate=rotate).loc[retest_data_raw.index, :]

    # reorder scores
    if rotate == 'oblimin':
        reorder_vec = EFA.get_factor_reorder(c, rotate=rotate)
        ref_scores = ref_scores.iloc[:, reorder_vec]
        retest_scores = retest_scores.iloc[:, reorder_vec]
    combined = pd.concat([ref_scores, retest_scores], axis=1)
    cross_diag = np.diag(combined.corr().iloc[c:, :c])
    # get ICCs
    ICCs = []
    for col in ref_scores.columns:
        tmp = combined.filter(regex=str(col))
        out = psych.ICC(tmp)
        ICCs.append(list(out[0][1])[-1])
    if verbose:
        print('%s, Avg Correlation: %s\n' % (name, format_num(np.mean(cross_diag))))
        for factor, num in zip(ref_scores.columns, cross_diag):
            print('%s: %s' % (factor, format_num(num)))
    return combined, cross_diag, ICCs
def plot_EFA_retest(combined, size=4.6, dpi=300, 
                    ext='png', plot_dir=None):
    corr = combined.corr()
    max_val = abs(corr).max().max()
    
    fig = plt.figure(figsize=(size,size)); 
    ax = fig.add_axes([.1, .1, .8, .8])
    cbar_ax = fig.add_axes([.92, .15, .04, .7])
    sns.heatmap(corr, square=True, ax=ax, cbar_ax=cbar_ax,
                vmin=-1, vmax=1,
                cmap=sns.diverging_palette(220,15,n=100,as_cmap=True),
                cbar_kws={'orientation': 'vertical',
                          'ticks': [-1, 0, 1]}); 
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
    ax.tick_params(labelsize=size/len(corr)*40)
    
    # format cbar axis
    cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)])
    cbar_ax.tick_params(labelsize=size, length=0, pad=size/2)
    cbar_ax.set_ylabel('Factor Loading', rotation=-90, 
                   fontsize=size, labelpad=size/2)
    
    # set divider lines
    n = corr.shape[1]
    ax.axvline(n//2, 0, n, color='k', linewidth=size/3)
    ax.axhline(n//2, 0, n, color='k', linewidth=size/3)
    
    if plot_dir is not None:
            save_figure(fig, path.join(plot_dir, 'EFA_test_retest_heatmap.%s' % ext),
                        {'bbox_inches': 'tight', 'dpi': dpi})
            plt.close()
def calc_EFA_retest(results, rotate='oblimin', verbose=True):
    name = results.ID.split('_')[0].title()
    retest_data_raw = get_behav_data(dataset=results.dataset.replace(
        'Complete', 'Retest'),
                                     file='meaningful_variables.csv')
    shared_ids = set(retest_data_raw.index) & set(results.data.index)
    retest_data_raw = retest_data_raw.loc[shared_ids, :]
    retest_scores = transfer_scores(retest_data_raw, results, rotate=rotate)
    retest_scores.columns = [str(i) + ' Retest' for i in retest_scores.columns]
    # scale and perform the factor score transformation
    EFA = results.EFA
    c = EFA.get_c()
    ref_scores = EFA.get_scores(c=c,
                                rotate=rotate).loc[retest_data_raw.index, :]

    # reorder scores
    if rotate == 'oblimin':
        reorder_vec = EFA.get_factor_reorder(c, rotate=rotate)
        ref_scores = ref_scores.iloc[:, reorder_vec]
        retest_scores = retest_scores.iloc[:, reorder_vec]
    combined = pd.concat([ref_scores, retest_scores], axis=1)
    cross_diag = np.diag(combined.corr().iloc[c:, :c])
    # get ICCs
    ICCs = []
    for col in ref_scores.columns:
        tmp = combined.filter(regex=str(col))
        out = psych.ICC(tmp)
        ICCs.append(list(out[0][1])[-1])
    if verbose:
        print('%s, Avg Correlation: %s\n' %
              (name, format_num(np.mean(cross_diag))))
        for factor, num in zip(ref_scores.columns, cross_diag):
            print('%s: %s' % (factor, format_num(num)))
    return combined, cross_diag, ICCs
def plot_prediction_scatter(results, target_order=None, EFA=True, change=False,
                            classifier='ridge', rotate='oblimin', 
                            normalize=False, metric='R2', size=4.6,  
                            dpi=300, ext='png', plot_dir=None):
    predictions = results.load_prediction_object(EFA=EFA, 
                                                 change=change,
                                                 classifier=classifier,
                                                 rotate=rotate)
    if predictions is None:
        print('No prediction object found!')
        return
    else:
        predictions = predictions['data']
    if EFA:
        predictors = results.EFA.get_scores()
    else:
        predictors = results.data
    if change:
        target_factors, _ = results.DA.get_change(results.dataset.replace('Complete', 'Retest'))
        predictors = predictors.loc[target_factors.index]
    else:
        target_factors = results.DA.get_scores()
    
    sns.set_style('whitegrid')
    n_cols = 2
    n_rows = math.ceil(len(target_factors.columns)/n_cols)
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(size, size/n_cols*n_rows))
    axes = fig.get_axes()
    for i,v in enumerate(target_factors.columns):
        MAE = format_num(predictions[v]['scores_cv'][0]['MAE'])
        R2 = format_num(predictions[v]['scores_cv'][0]['R2'])
        axes[i].set_title('%s: R2: %s, MAE: %s' % (v, R2, MAE), 
            fontweight='bold', fontsize=size*1.5)
        clf=predictions[v]['clf']
        axes[i].scatter(target_factors[v], clf.predict(predictors), s=size*3)  
        axes[i].tick_params(length=0, labelsize=0)
        if i%2==0:
            axes[i].set_ylabel('Predicted Factor Score', fontsize=size*1.5)
    axes[i].set_xlabel('Target Factor Score', fontsize=size*1.5)
    axes[i-1].set_xlabel('Target Factor Score', fontsize=size*1.5)
    
    empty_plots = n_cols*n_rows - len(target_factors.columns)
    for ax in axes[-empty_plots:]:
        ax.set_visible(False)
    plt.subplots_adjust(hspace=.4, wspace=.3)
    
    if plot_dir is not None:
        changestr = '_change' if change else ''
        if EFA:
            filename = 'EFA%s_%s_prediction_scatter.%s' % (changestr, classifier, ext)
        else:
            filename = 'IDM%s_%s_prediction_scatter.%s' % (changestr, classifier, ext)
        save_figure(fig, path.join(plot_dir, filename), 
                    {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
def plot_prediction_scatter(predictions,
                            predictors,
                            targets,
                            target_order=None,
                            metric='R2',
                            size=4.6,
                            dpi=300,
                            filename=None):
    # subset predictors
    predictors = predictors.loc[targets.index]
    if target_order is None:
        target_order = predictions.keys()

    sns.set_style('white')
    n_cols = 4
    n_rows = math.ceil(len(target_order) / n_cols)
    fig, axes = plt.subplots(n_rows,
                             n_cols,
                             figsize=(size, size / n_cols * n_rows))
    axes = fig.get_axes()
    for i, v in enumerate(target_order):
        MAE = format_num(predictions[v]['scores_cv'][0]['MAE'])
        R2 = format_num(predictions[v]['scores_cv'][0]['R2'])
        axes[i].set_title('%s\nR2: %s, MAE: %s' %
                          ('\n'.join(v.split()), R2, MAE),
                          fontweight='bold',
                          fontsize=size * 1)
        clf = predictions[v]['clf']
        axes[i].scatter(targets[v],
                        clf.predict(predictors),
                        s=size * 2.5,
                        edgecolor='white',
                        linewidth=size / 30)
        axes[i].tick_params(length=0, labelsize=0)
        # add diagonal
        xlim = axes[i].get_xlim()
        ylim = axes[i].get_ylim()
        axes[i].plot(xlim, ylim, ls="-", c=".5", zorder=-1)
        axes[i].set_xlim(xlim)
        axes[i].set_ylim(ylim)
        for spine in ['top', 'right']:
            axes[i].spines[spine].set_visible(False)
        if i % n_cols == 0:
            axes[i].set_ylabel('Predicted Score', fontsize=size * 1.2)
    for ax in axes[-(len(target_order) + 1):]:
        ax.set_xlabel('Target Score', fontsize=size * 1.2)

    empty_plots = n_cols * n_rows - len(targets.columns)
    if empty_plots > 0:
        for ax in axes[-empty_plots:]:
            ax.set_visible(False)
    plt.subplots_adjust(hspace=.6, wspace=.3)
    if filename is not None:
        save_figure(fig, filename, {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
def plot_predictors_comparison(R2_df, size=2, dpi=300, filename=None):
    CV_df = R2_df.filter(regex='CV', axis=0)
    CV_corr = CV_df.corr(method='spearman')

    max_R2 = round(CV_df.max(numeric_only=True).max(), 1)
    size = 2
    grid = sns.pairplot(CV_df, hue='Target_Cat', height=size)
    for i, row in enumerate(grid.axes):
        for j, ax in enumerate(row):
            ax.set_xlim([0, max_R2])
            ax.set_ylim([0, max_R2])
            xlim = ax.get_xlim()
            ylim = ax.get_ylim()
            ax.plot(xlim, ylim, ls=":", c=".5", zorder=-1)
            ax.set_xlim(xlim)
            ax.set_ylim(ylim)
            if j < i:
                ax.text(.5,
                        1,
                        r'$\rho$ = %s' % format_num(CV_corr.iloc[i, j]),
                        ha='center',
                        fontsize=size * 7,
                        fontweight='bold',
                        transform=ax.transAxes)
            if j > i:
                ax.set_visible(False)
    if filename is not None:
        save_figure(grid.fig, filename, {'bbox_inches': 'tight', 'dpi': dpi})
    else:
        return grid
def plot_EFA_retest(combined, size=4.6, dpi=300, ext='png', plot_dir=None):
    corr = combined.corr()
    max_val = abs(corr).max().max()

    fig = plt.figure(figsize=(size, size))
    ax = fig.add_axes([.1, .1, .8, .8])
    cbar_ax = fig.add_axes([.92, .15, .04, .7])
    sns.heatmap(corr,
                square=True,
                ax=ax,
                cbar_ax=cbar_ax,
                vmin=-1,
                vmax=1,
                cmap=sns.diverging_palette(220, 15, n=100, as_cmap=True),
                cbar_kws={
                    'orientation': 'vertical',
                    'ticks': [-1, 0, 1]
                })
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    print('LABELS THAT WORK??????')
    print(ax.get_yticklabels())
    ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
    print(ax.get_yticklabels())
    ax.tick_params(labelsize=size / len(corr) * 40)

    # format cbar axis
    cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)])
    cbar_ax.tick_params(labelsize=size, length=0, pad=size / 2)
    cbar_ax.set_ylabel('Factor Loading',
                       rotation=-90,
                       fontsize=size,
                       labelpad=size / 2)

    # set divider lines
    n = corr.shape[1]
    ax.axvline(n // 2, 0, n, color='k', linewidth=size / 3)
    ax.axhline(n // 2, 0, n, color='k', linewidth=size / 3)

    if plot_dir is not None:
        save_figure(fig, path.join(plot_dir,
                                   'EFA_test_retest_heatmap.%s' % ext), {
                                       'bbox_inches': 'tight',
                                       'dpi': dpi
                                   })
        plt.close()
Exemplo n.º 9
0
def plot_vars(tasks, contrasts, axes=None, xlabel='Value', standardize=False):
    colors = sns.hls_palette(4)
    desat_colors = [sns.desaturate(c, .5) for c in colors]
    for i, task in enumerate(tasks):
        subset = contrasts.filter(regex='^' + task)
        if subset.shape[1] != 0:
            if standardize:
                subset = subset / subset.std()
            subset.columns = [c.split('.')[1] for c in subset.columns]
            subset.columns = format_variable_names(subset.columns)
            # add mean value to columns
            means = subset.mean()
            subset.columns = [
                subset.columns[i] + ': %s' % format_num(means.iloc[i])
                for i in range(len(means))
            ]
            subset = subset.melt(var_name='Variable', value_name='Value')

            sns.stripplot(x='Value',
                          y='Variable',
                          hue='Variable',
                          ax=axes[i],
                          data=subset,
                          palette=desat_colors,
                          jitter=True,
                          alpha=.75)
            # plot central tendency
            N = len(means)
            axes[i].scatter(means,
                            range(N),
                            s=200,
                            c=colors[:N],
                            edgecolors='white',
                            linewidths=2,
                            zorder=3)

            # add legend
            leg = axes[i].get_legend()
            leg.set_title('')
            beautify_legend(leg, colors=colors, fontsize=14)
            # change axes
            max_val = subset.Value.abs().max()
            axes[i].set_xlim(-max_val, max_val)
            axes[i].set_xlabel(xlabel, fontsize=16)
            axes[i].set_ylabel('')
            axes[i].set_yticklabels('')
        axes[i].set_title(format_variable_names([task])[0].title(),
                          fontsize=20)
    plt.subplots_adjust(hspace=.3)
Exemplo n.º 10
0
def plot_GAM(gams, X, Y, size=4, dpi=300, ext='png', filename=None):
    cols = X.shape[1]
    rows = Y.shape[1]
    colors = sns.color_palette(n_colors=rows)
    plt.rcParams['figure.figsize'] = (cols * size, rows * size)
    fig, mat_axs = plt.subplots(rows, cols)
    titles = X.columns
    for j, (name, out) in enumerate(gams.items()):
        axs = mat_axs[j]
        gam = out['model']
        R2 = get_avg_score(out['scores_cv'])
        p_vals = gam.statistics_['p_values']
        for i, ax in enumerate(axs):
            plot_term(gam, i, ax, colors[j], size=size)
            ax.set_xlabel('')
            ax.text(.5,
                    .95,
                    'p< %s' % format_num(p_vals[i]),
                    va='center',
                    fontsize=size * 3,
                    transform=ax.transAxes)
            if j % 2 == 0:
                ax.set_title(titles[i], fontsize=size * 4)
            if i == 0:
                ax.set_ylabel(name + ' (%s)' % format_num(R2),
                              fontsize=size * 4)
            else:
                ax.set_ylabel('')

    plt.subplots_adjust(hspace=.4)
    if filename is not None:
        save_figure(fig, '%s.%s' % (filename, ext), {
            'bbox_inches': 'tight',
            'dpi': dpi
        })
        plt.close()
# ********************************************************
# Inspect
# ********************************************************
gams = GAM_results['task']
X = results['task'].EFA.get_scores()
ridge_prediction = results['task'].load_prediction_object(classifier='ridge')['data']

for k,v in gams.items():
    ridge_r2cv = ridge_prediction[k]['scores_cv'][0]['R2']
    ridge_r2in = ridge_prediction[k]['scores_insample'][0]['R2']
    print('*'*79)
    print(k)
    print('GAM CV', get_avg_score(v['scores_cv']))
    print('GAM Insample', get_avg_score(v['scores_insample']))
    print('*')
    print('Ridge CV', format_num(ridge_r2cv, 3))
    print('Ridge insample', format_num(ridge_r2in, 3))
    print('*'*79)
    

# plot full matrix
plot_dir = path.dirname(results['task'].get_plot_dir())
plot_GAM(GAM_results['task'], 
         results['task'].EFA.get_scores(), 
         Y, 
         filename=path.join(plot_dir, 'task_GAM'))

plot_GAM(GAM_results['survey'], 
         results['survey'].EFA.get_scores(), 
         Y, 
         filename=path.join(plot_dir, 'survey_GAM'))
def plot_prediction_scatter(results,
                            target_order=None,
                            EFA=True,
                            change=False,
                            classifier='ridge',
                            rotate='oblimin',
                            normalize=False,
                            metric='R2',
                            size=4.6,
                            dpi=300,
                            ext='png',
                            plot_dir=None):
    predictions = results.load_prediction_object(EFA=EFA,
                                                 change=change,
                                                 classifier=classifier,
                                                 rotate=rotate)
    if predictions is None:
        print('No prediction object found!')
        return
    else:
        predictions = predictions['data']
    if EFA:
        predictors = results.EFA.get_scores()
    else:
        predictors = results.data
    if change:
        target_factors, _ = results.DA.get_change(
            results.dataset.replace('Complete', 'Retest'))
        predictors = predictors.loc[target_factors.index]
    else:
        target_factors = results.DA.get_scores()

    sns.set_style('whitegrid')
    n_cols = 2
    n_rows = math.ceil(len(target_factors.columns) / n_cols)
    fig, axes = plt.subplots(n_rows,
                             n_cols,
                             figsize=(size, size / n_cols * n_rows))
    axes = fig.get_axes()
    for i, v in enumerate(target_factors.columns):
        MAE = format_num(predictions[v]['scores_cv'][0]['MAE'])
        R2 = format_num(predictions[v]['scores_cv'][0]['R2'])
        axes[i].set_title('%s: R2: %s, MAE: %s' % (v, R2, MAE),
                          fontweight='bold',
                          fontsize=size * 1.5)
        clf = predictions[v]['clf']
        axes[i].scatter(target_factors[v], clf.predict(predictors), s=size * 3)
        axes[i].tick_params(length=0, labelsize=0)
        if i % 2 == 0:
            axes[i].set_ylabel('Predicted Factor Score', fontsize=size * 1.5)
    axes[i].set_xlabel('Target Factor Score', fontsize=size * 1.5)
    axes[i - 1].set_xlabel('Target Factor Score', fontsize=size * 1.5)

    empty_plots = n_cols * n_rows - len(target_factors.columns)
    for ax in axes[-empty_plots:]:
        ax.set_visible(False)
    plt.subplots_adjust(hspace=.4, wspace=.3)

    if plot_dir is not None:
        changestr = '_change' if change else ''
        if EFA:
            filename = 'EFA%s_%s_prediction_scatter.%s' % (changestr,
                                                           classifier, ext)
        else:
            filename = 'IDM%s_%s_prediction_scatter.%s' % (changestr,
                                                           classifier, ext)
        save_figure(fig, path.join(plot_dir, filename), {
            'bbox_inches': 'tight',
            'dpi': dpi
        })
        plt.close()
Exemplo n.º 13
0
def plot_dendrogram(loading,
                    clustering,
                    title=None,
                    break_lines=True,
                    drop_list=None,
                    double_drop_list=None,
                    absolute_loading=False,
                    size=4.6,
                    dpi=300,
                    filename=None):
    """ Plots HCA results as dendrogram with loadings underneath
    
    Args:
        loading: pandas df, a results EFA loading matrix
        clustering: pandas df, a results HCA clustering
        title (optional): str, title to plot
        break_lines: whether to separate EFA heatmap based on clusters, default=True
        drop_list (optional): list of cluster indices to drop the cluster label
        drop_list (optional): list of cluster indices to drop the cluster label twice
        absolute_loading: whether to plot the absolute loading value, default False
        plot_dir: if set, where to save the plot
        
    """

    c = loading.shape[1]
    # extract cluster vars
    link = clustering['linkage']
    DVs = clustering['clustered_df'].columns
    ordered_loading = loading.loc[DVs]
    if absolute_loading:
        ordered_loading = abs(ordered_loading)
    # get cluster sizes
    labels = clustering['labels']
    cluster_sizes = [np.sum(labels == (i + 1)) for i in range(max(labels))]
    link_function, colors = get_dendrogram_color_fun(link,
                                                     clustering['reorder_vec'],
                                                     labels)

    # set figure properties
    figsize = (size, size * .6)
    # set up axes' size
    heatmap_height = ordered_loading.shape[1] * .035
    heat_size = [.1, heatmap_height]
    dendro_size = [np.sum(heat_size), .3]
    # set up plot axes
    dendro_size = [.15, dendro_size[0], .78, dendro_size[1]]
    heatmap_size = [.15, heat_size[0], .78, heat_size[1]]
    cbar_size = [.935, heat_size[0], .015, heat_size[1]]
    ordered_loading = ordered_loading.T

    with sns.axes_style('white'):
        fig = plt.figure(figsize=figsize)
        ax1 = fig.add_axes(dendro_size)
        # **********************************
        # plot dendrogram
        # **********************************
        with plt.rc_context({'lines.linewidth': size * .125}):
            dendrogram(link,
                       ax=ax1,
                       link_color_func=link_function,
                       orientation='top')
        # change axis properties
        ax1.tick_params(axis='x',
                        which='major',
                        labelsize=14,
                        labelbottom=False)
        ax1.get_yaxis().set_visible(False)
        ax1.spines['top'].set_visible(False)
        ax1.spines['right'].set_visible(False)
        ax1.spines['bottom'].set_visible(False)
        ax1.spines['left'].set_visible(False)
        # **********************************
        # plot loadings as heatmap below
        # **********************************
        ax2 = fig.add_axes(heatmap_size)
        cbar_ax = fig.add_axes(cbar_size)
        max_val = np.max(abs(loading.values))
        # bring to closest .25
        max_val = ceil(max_val * 4) / 4
        sns.heatmap(ordered_loading,
                    ax=ax2,
                    cbar=True,
                    cbar_ax=cbar_ax,
                    yticklabels=True,
                    xticklabels=True,
                    vmax=max_val,
                    vmin=-max_val,
                    cbar_kws={
                        'orientation': 'vertical',
                        'ticks': [-max_val, 0, max_val]
                    },
                    cmap=sns.diverging_palette(220, 15, n=100, as_cmap=True))
        ax2.set_yticklabels(ax2.get_yticklabels(), rotation=0)
        ax2.tick_params(axis='y',
                        labelsize=size * heat_size[1] * 30 / c,
                        pad=size / 4,
                        length=0)
        # format cbar axis
        cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)])
        cbar_ax.tick_params(labelsize=size * heat_size[1] * 25 / c,
                            length=0,
                            pad=size / 2)
        cbar_ax.set_ylabel('Factor Loading',
                           rotation=-90,
                           fontsize=size * heat_size[1] * 30 / c,
                           labelpad=size * 2)

        # add lines to heatmap to distinguish clusters
        if break_lines == True:
            xlim = ax2.get_xlim()
            ylim = ax2.get_ylim()
            step = xlim[1] / len(labels)
            cluster_breaks = [i * step for i in np.cumsum(cluster_sizes)]
            ax2.vlines(cluster_breaks[:-1],
                       ylim[0],
                       ylim[1],
                       linestyles='dashed',
                       linewidth=size * .1,
                       colors=[.5, .5, .5],
                       zorder=10)
        # **********************************
        # plot cluster names
        # **********************************
        beginnings = np.hstack([[0], np.cumsum(cluster_sizes)[:-1]])
        centers = beginnings + np.array(cluster_sizes) // 2 + .5
        offset = .07
        if 'cluster_names' in clustering.keys():
            ax2.tick_params(axis='x',
                            reset=True,
                            top=False,
                            bottom=False,
                            width=size / 8,
                            length=0)
            names = [transform_name(i) for i in clustering['cluster_names']]
            ax2.set_xticks(centers)
            ax2.set_xticklabels(names,
                                rotation=0,
                                ha='center',
                                fontsize=heatmap_size[2] * size * 1)
            ticks = ax2.xaxis.get_ticklines()[::2]
            for i, label in enumerate(ax2.get_xticklabels()):
                if label.get_text() != '':
                    ax2.hlines(c + offset,
                               beginnings[i] + .5,
                               beginnings[i] + cluster_sizes[i] - .5,
                               clip_on=False,
                               color=colors[i],
                               linewidth=size / 5)
                    label.set_color(colors[i])
                    ticks[i].set_color(colors[i])
                    y_drop = .005
                    line_drop = .3
                    if drop_list and i in drop_list:
                        y_drop = .05
                        line_drop = 1.6
                    if double_drop_list and i in double_drop_list:
                        y_drop = .1
                        line_drop = 2.9
                    label.set_y(-(y_drop / heatmap_height +
                                  heatmap_height / c * offset))
                    ax2.vlines(beginnings[i] + cluster_sizes[i] / 2,
                               c + offset,
                               c + offset + line_drop,
                               clip_on=False,
                               color=colors[i],
                               linewidth=size / 7.5)

        # add title
        if title:
            ax1.set_title(title, fontsize=size * 2, y=1.05)

    if filename is not None:
        save_figure(fig, filename, {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
    else:
        return fig
Exemplo n.º 14
0
def plot_subbranch(target_color,
                   cluster_i,
                   tree,
                   loading,
                   cluster_sizes,
                   title=None,
                   size=2.3,
                   dpi=300,
                   plot_loc=None):
    sns.set_style('white')
    colormap = sns.diverging_palette(220, 15, n=100, as_cmap=True)
    # get variables in subbranch based on coloring
    curr_color = tree['color_list'][0]
    start = 0
    for i, color in enumerate(tree['color_list']):
        if color != curr_color:
            end = i
            if curr_color == to_hex(target_color):
                break
            if color != "#808080":
                start = i
            curr_color = color

    if (end - start) + 1 != cluster_sizes[cluster_i]:
        return

    # get subset of loading
    cumsizes = np.cumsum(cluster_sizes)
    if cluster_i == 0:
        loading_start = 0
    else:
        loading_start = cumsizes[cluster_i - 1]
    subset_loading = loading.T.iloc[:, loading_start:cumsizes[cluster_i]]

    # plotting
    N = subset_loading.shape[1]
    length = N * .05
    dendro_size = [0, .746, length, .12]
    heatmap_size = [0, .5, length, .25]
    fig = plt.figure(figsize=(size * 2, size * 4))
    dendro_ax = fig.add_axes(dendro_size)
    heatmap_ax = fig.add_axes(heatmap_size)
    cbar_size = [length + .22, .5, .05, .25]
    factor_avg_size = [length + .01, .5, .2, .25]
    factor_avg_ax = fig.add_axes(factor_avg_size)
    cbar_ax = fig.add_axes(cbar_size)
    #subset_loading.columns = [col.replace(': ',':\n', 1) for col in subset_loading.columns]
    plot_tree(tree, range(start, end), dendro_ax, linewidth=size / 2)
    dendro_ax.set_xticklabels('')

    max_val = np.max(loading.values)
    # if max_val is high, just make it 1
    if max_val > .9:
        max_val = 1
    sns.heatmap(
        subset_loading,
        ax=heatmap_ax,
        cbar=True,
        cbar_ax=cbar_ax,
        cbar_kws={'ticks': [-max_val, 0, max_val]},
        yticklabels=True,
        vmin=-max_val,
        vmax=max_val,
        cmap=colormap,
    )
    yn, xn = subset_loading.shape
    tick_label_size = size * 30 / max(yn, 8)
    heatmap_ax.tick_params(labelsize=tick_label_size,
                           length=size * .5,
                           width=size / 5,
                           pad=size)
    heatmap_ax.set_yticklabels(heatmap_ax.get_yticklabels(), rotation=0)
    heatmap_ax.set_xticks([i + .5 for i in range(0, subset_loading.shape[1])])
    heatmap_ax.set_xticklabels(
        [str(i) for i in range(1, subset_loading.shape[1] + 1)],
        size=size * 2,
        rotation=0,
        ha='center')

    avg_factors = abs(subset_loading).mean(1)
    # format cbar axis
    cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)])
    cbar_ax.tick_params(axis='y', length=0)
    cbar_ax.tick_params(labelsize=size * 3)
    cbar_ax.set_ylabel('Factor Loading',
                       rotation=-90,
                       fontsize=size * 3,
                       labelpad=size * 2)
    # add axis labels as text above
    text_ax = fig.add_axes([-.22, .44 - .02 * N, .4, .02 * N])
    for spine in ['top', 'right', 'bottom', 'left']:
        text_ax.spines[spine].set_visible(False)
    for i, label in enumerate(subset_loading.columns):
        text_ax.text(0,
                     1 - i / N,
                     str(i + 1) + '.',
                     fontsize=size * 2.8,
                     ha='right')
        text_ax.text(.1, 1 - i / N, label, fontsize=size * 3)
    text_ax.tick_params(which='both',
                        labelbottom=False,
                        labelleft=False,
                        bottom=False,
                        left=False)
    # average factor bar
    avg_factors[::-1].plot(kind='barh',
                           ax=factor_avg_ax,
                           width=.7,
                           color=tree['color_list'][start])
    factor_avg_ax.set_xlim(0, max_val)
    #factor_avg_ax.set_xticks([max(avg_factors)])
    #factor_avg_ax.set_xticklabels([format_num(max(avg_factors))])
    factor_avg_ax.set_xticklabels('')
    factor_avg_ax.set_yticklabels('')
    factor_avg_ax.tick_params(length=0)
    factor_avg_ax.spines['top'].set_visible(False)
    factor_avg_ax.spines['bottom'].set_visible(False)
    factor_avg_ax.spines['left'].set_visible(False)
    factor_avg_ax.spines['right'].set_visible(False)

    # title and axes styling of dendrogram
    if title:
        dendro_ax.set_title(title,
                            fontsize=size * 3,
                            y=1.05,
                            fontweight='bold')
    dendro_ax.get_yaxis().set_visible(False)
    dendro_ax.spines['top'].set_visible(False)
    dendro_ax.spines['right'].set_visible(False)
    dendro_ax.spines['bottom'].set_visible(False)
    dendro_ax.spines['left'].set_visible(False)
    if plot_loc is not None:
        try:
            print('about to crash? - dpi: ' + str(dpi))
            save_figure(fig, plot_loc, {'bbox_inches': 'tight', 'dpi': dpi})
            plt.close()
        except ValueError:
            print('something when wrong with that plot')
            plt.close()
    else:
        return fig
Exemplo n.º 15
0
# ********************************************************
# Inspect
# ********************************************************
gams = GAM_results['task']
X = results['task'].EFA.get_scores()
ridge_prediction = results['task'].load_prediction_object(
    classifier='ridge')['data']

for k, v in gams.items():
    ridge_r2cv = ridge_prediction[k]['scores_cv'][0]['R2']
    ridge_r2in = ridge_prediction[k]['scores_insample'][0]['R2']
    print('*' * 79)
    print(k)
    print('GAM CV', get_avg_score(v['scores_cv']))
    print('GAM Insample', get_avg_score(v['scores_insample']))
    print('*')
    print('Ridge CV', format_num(ridge_r2cv, 3))
    print('Ridge insample', format_num(ridge_r2in, 3))
    print('*' * 79)

# plot full matrix
plot_dir = path.dirname(results['task'].get_plot_dir())
plot_GAM(GAM_results['task'],
         results['task'].EFA.get_scores(),
         Y,
         filename=path.join(plot_dir, 'task_GAM'))

plot_GAM(GAM_results['survey'],
         results['survey'].EFA.get_scores(),
         Y,
         filename=path.join(plot_dir, 'survey_GAM'))
Exemplo n.º 16
0
 # *************************************************************************
 max_val = round(abs(task_subset).max().max(),1)
 loading_data = task_subset.filter(regex=tasks[task_i], axis=0)
 # for visualization purposes remove "reflections" from loading matrix
 # by multiplying by -1
 reflects = [-1 if 'ReflogTr' in i else 1 for i in loading_data.index]
 loading_data = loading_data.multiply(reflects, axis=0)
 # plot loadings
 sns.heatmap(loading_data.iloc[::-1,:], ax=loading_axes[task_i], 
             yticklabels=False, xticklabels=False,
             linecolor='white', linewidth=basewidth,
             cbar_ax=cbar_ax, vmax =  max_val, vmin = -max_val,
             cbar_kws={'ticks': [-max_val, 0, max_val]},
             cmap=sns.diverging_palette(220,16,n=100, as_cmap=True))
 # format cbar
 cbar_ax.set_yticklabels([format_num(-max_val, 1), 0, format_num(max_val, 1)])
 cbar_ax.tick_params(axis='y', length=0)
 cbar_ax.tick_params(labelsize=basefont)
 for i in range(1,loading_data.shape[0]+1):
     #loading_axes[task_i].hlines(i, -.2, 6.1, color='white', linewidth=basewidth*3)
     loading_axes[task_i].add_patch(Rectangle([-.1,i-.2], 
                 width=loading_data.shape[1]+.2, height=.2, zorder=100,
                 facecolor='white', edgecolor='white', 
                 linewidth=basewidth, clip_on=False))
 # add boxes
 for i in range(len(tick_names)):
     box_color = tick_colors[len(tick_names)-(i+1)]
     box_pos = [-.15, i+.2]
     loading_axes[task_i].add_patch(Rectangle(box_pos, 
                 width=.15, height=.4, zorder=100,
                 facecolor=box_color, edgecolor=box_color, 
def plot_subbranch(target_color, cluster_i, tree, loading, cluster_sizes, title=None,
                   size=2.3, dpi=300, plot_loc=None):
    sns.set_style('white')
    colormap = sns.diverging_palette(220,15,n=100,as_cmap=True)
    # get variables in subbranch based on coloring
    curr_color = tree['color_list'][0]
    start = 0
    for i, color in enumerate(tree['color_list']):
        if color != curr_color:
            end = i
            if curr_color == to_hex(target_color):
                break
            if color != "#808080":
                start = i
            curr_color = color
    
    if (end-start)+1 != cluster_sizes[cluster_i]:
        return
    
    # get subset of loading
    cumsizes = np.cumsum(cluster_sizes)
    if cluster_i==0:
        loading_start = 0
    else:
        loading_start = cumsizes[cluster_i-1]
    subset_loading = loading.T.iloc[:,loading_start:cumsizes[cluster_i]]
    
    # plotting
    N = subset_loading.shape[1]
    length = N*.05
    dendro_size = [0,.746,length,.12]
    heatmap_size = [0,.5,length,.25]
    fig = plt.figure(figsize=(size,size*2))
    dendro_ax = fig.add_axes(dendro_size) 
    heatmap_ax = fig.add_axes(heatmap_size)
    cbar_size = [length+.22, .5, .05, .25]
    factor_avg_size = [length+.01,.5,.2,.25]
    factor_avg_ax = fig.add_axes(factor_avg_size)
    cbar_ax = fig.add_axes(cbar_size)
    #subset_loading.columns = [col.replace(': ',':\n', 1) for col in subset_loading.columns]
    plot_tree(tree, range(start, end), dendro_ax, linewidth=size/2)
    dendro_ax.set_xticklabels('')
    
    max_val = np.max(loading.values)
    # if max_val is high, just make it 1
    if max_val > .9:
        max_val = 1
    sns.heatmap(subset_loading, ax=heatmap_ax, 
                cbar=True,
                cbar_ax=cbar_ax,
                cbar_kws={'ticks': [-max_val, 0, max_val]},
                yticklabels=True,
                vmin=-max_val,
                vmax=max_val,
                cmap=colormap,)
    yn, xn = subset_loading.shape
    tick_label_size = size*30/max(yn, 8)
    heatmap_ax.tick_params(labelsize=tick_label_size, length=size*.5, 
                           width=size/5, pad=size)
    heatmap_ax.set_yticklabels(heatmap_ax.get_yticklabels(), rotation=0)
    heatmap_ax.set_xticks([i+.5 for i in range(0,subset_loading.shape[1])])
    heatmap_ax.set_xticklabels([str(i) for i in range(1,subset_loading.shape[1]+1)], 
                                size=size*2, rotation=0, ha='center')

    avg_factors = abs(subset_loading).mean(1)
    # format cbar axis
    cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)])
    cbar_ax.tick_params(axis='y', length=0)
    cbar_ax.tick_params(labelsize=size*3)
    cbar_ax.set_ylabel('Factor Loading', rotation=-90, fontsize=size*3,
                       labelpad=size*2)
    # add axis labels as text above
    text_ax = fig.add_axes([-.22,.44-.02*N,.4,.02*N]) 
    for spine in ['top','right','bottom','left']:
        text_ax.spines[spine].set_visible(False)
    for i, label in enumerate(subset_loading.columns):
        text_ax.text(0, 1-i/N, str(i+1)+'.', fontsize=size*2.8, ha='right')
        text_ax.text(.1, 1-i/N, label, fontsize=size*3)
    text_ax.tick_params(which='both', labelbottom=False, labelleft=False,
                        bottom=False, left=False)
    # average factor bar                
    avg_factors[::-1].plot(kind='barh', ax = factor_avg_ax, width=.7,
                     color= tree['color_list'][start])
    factor_avg_ax.set_xlim(0, max_val)
    #factor_avg_ax.set_xticks([max(avg_factors)])
    #factor_avg_ax.set_xticklabels([format_num(max(avg_factors))])
    factor_avg_ax.set_xticklabels('')
    factor_avg_ax.set_yticklabels('')
    factor_avg_ax.tick_params(length=0)
    factor_avg_ax.spines['top'].set_visible(False)
    factor_avg_ax.spines['bottom'].set_visible(False)
    factor_avg_ax.spines['left'].set_visible(False)
    factor_avg_ax.spines['right'].set_visible(False)
        
    # title and axes styling of dendrogram
    if title:
        dendro_ax.set_title(title, fontsize=size*3, y=1.05, fontweight='bold')
    dendro_ax.get_yaxis().set_visible(False)
    dendro_ax.spines['top'].set_visible(False)
    dendro_ax.spines['right'].set_visible(False)
    dendro_ax.spines['bottom'].set_visible(False)
    dendro_ax.spines['left'].set_visible(False)
    if plot_loc is not None:
        save_figure(fig, plot_loc, {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
    else:
        return fig
def plot_communality(results, c, rotate='oblimin', retest_threshold=.2,
                     size=4.6, dpi=300, ext='png', plot_dir=None):
    EFA = results.EFA
    communality = get_communality(EFA, rotate, c)
    # load retest data
    retest_data = get_retest_data(dataset=results.dataset.replace('Complete','Retest'))
    if retest_data is None:
        print('No retest data found for datafile: %s' % results.dataset)
        return
    
    # reorder data in line with communality
    retest_data = retest_data.loc[communality.index]
    # reformat variable names
    communality.index = format_variable_names(communality.index)
    retest_data.index = format_variable_names(retest_data.index)
    if len(retest_data) > 0:
        adjusted_communality,correlation, noise_ceiling = \
                get_adjusted_communality(communality, 
                                         retest_data,
                                         retest_threshold)
        
    # plot communality bars woo!
    if len(retest_data)>0:
        f, axes = plt.subplots(1, 3, figsize=(3*(size/10), size))
    
        plot_bar_factor(communality, axes[0], width=size/10, height=size,
                        label_rows=True,  title='Communality')
        plot_bar_factor(noise_ceiling, axes[1], width=size/10, height=size,
                        label_rows=False,  title='Test-Retest')
        plot_bar_factor(adjusted_communality, axes[2], width=size/10, height=size,
                        label_rows=False,  title='Adjusted Communality')
    else:
        f = plot_bar_factor(communality, label_rows=True, 
                            width=size/3, height=size*2, title='Communality')
    if plot_dir:
        filename = 'communality_bars-EFA%s.%s' % (c, ext)
        save_figure(f, path.join(plot_dir, filename), 
                    {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
    
    # plot communality histogram
    if len(retest_data) > 0:
        with sns.axes_style('white'):
            colors = sns.color_palette(n_colors=2, desat=.75)
            f, ax = plt.subplots(1,1,figsize=(size,size))
            sns.kdeplot(communality, linewidth=size/4, 
                        shade=True, label='Communality', color=colors[0])
            sns.kdeplot(adjusted_communality, linewidth=size/4, 
                        shade=True, label='Adjusted Communality', color=colors[1])
            ylim = ax.get_ylim()
            ax.vlines(np.mean(communality), ylim[0], ylim[1],
                      color=colors[0], linewidth=size/4, linestyle='--')
            ax.vlines(np.mean(adjusted_communality), ylim[0], ylim[1],
                      color=colors[1], linewidth=size/4, linestyle='--')
            leg=ax.legend(fontsize=size*2, loc='upper right')
            beautify_legend(leg, colors)
            plt.xlabel('Communality', fontsize=size*2)
            plt.ylabel('Normalized Density', fontsize=size*2)
            ax.set_yticks([])
            ax.tick_params(labelsize=size)
            ax.set_ylim(0, ax.get_ylim()[1])
            ax.set_xlim(0, ax.get_xlim()[1])
            ax.spines['right'].set_visible(False)
            #ax.spines['left'].set_visible(False)
            ax.spines['top'].set_visible(False)
            # add correlation
            correlation = format_num(np.mean(correlation))
            ax.text(1.1, 1.25, 'Correlation Between Communality \nand Test-Retest: %s' % correlation,
                    size=size*2)

        if plot_dir:
            filename = 'communality_dist-EFA%s.%s' % (c, ext)
            save_figure(f, path.join(plot_dir, filename), 
                        {'bbox_inches': 'tight', 'dpi': dpi})
            plt.close()
def plot_heatmap_factors(results, c, size=4.6, thresh=75, rotate='oblimin',
                     DA=False, dpi=300, ext='png', plot_dir=None):
    """ Plots factor analytic results as bars
    
    Args:
        results: a dimensional structure results object
        c: the number of components to use
        dpi: the final dpi for the image
        size: scalar - the width of the plot. The height is determined
            by the number of factors
        thresh: proportion of factor loadings to remove
        ext: the extension for the saved figure
        plot_dir: the directory to save the figure. If none, do not save
    """
    if DA:
        EFA = results.DA
    else:
        EFA = results.EFA
    loadings = EFA.get_loading(c, rotate=rotate)
    loadings = EFA.reorder_factors(loadings, rotate=rotate)           
    grouping = get_factor_groups(loadings)
    flattened_factor_order = []
    for sublist in [i[1] for i in grouping]:
        flattened_factor_order += sublist
    loadings = loadings.loc[flattened_factor_order]
    # get threshold for loadings
    if thresh>0:
        thresh_val = np.percentile(abs(loadings).values, thresh)
        print('Thresholding all loadings less than %s' % np.round(thresh_val, 3))
        loadings = loadings.mask(abs(loadings) <= thresh_val, 0)
        # remove variables that don't cross the threshold for any factor
        kept_vars = list(loadings.index[loadings.mean(1)!=0])
        print('%s Variables out of %s are kept after threshold' % (len(kept_vars), loadings.shape[0]))
        loadings = loadings.loc[kept_vars]
        # remove masked variabled from grouping
        threshed_groups = []
        for factor, group in grouping:
            group = [x for x in group if x in kept_vars]
            threshed_groups.append([factor,group])
        grouping = threshed_groups
    # change variable names to make them more readable
    loadings.index = format_variable_names(loadings.index)
    # set up plot variables
    DV_fontsize = size*2/(loadings.shape[0]//2)*30
    figsize = (size,size*2)
    
    f = plt.figure(figsize=figsize)
    ax = f.add_axes([0, 0, .08*loadings.shape[1], 1]) 
    cbar_ax = f.add_axes([.08*loadings.shape[1]+.02,0,.04,1]) 

    max_val = abs(loadings).max().max()
    sns.heatmap(loadings, ax=ax, cbar_ax=cbar_ax,
                vmax =  max_val, vmin = -max_val,
                cbar_kws={'ticks': [-max_val, -max_val/2, 0, max_val/2, max_val]},
                linecolor='white', linewidth=.01,
                cmap=sns.diverging_palette(220,15,n=100,as_cmap=True))
    ax.set_yticks(np.arange(.5,loadings.shape[0]+.5,1))
    ax.set_yticklabels(loadings.index, fontsize=DV_fontsize, rotation=0)
    ax.set_xticklabels(loadings.columns, 
                       fontsize=min(size*3, DV_fontsize*1.5),
                       ha='center',
                       rotation=90)
    ax.tick_params(length=size*.5, width=size/10)
    # format cbar
    cbar_ax.set_yticklabels([format_num(-max_val, 2), 
                             format_num(-max_val/2, 2),
                             0, 
                             format_num(-max_val/2, 2),
                             format_num(max_val, 2)])
    cbar_ax.tick_params(axis='y', length=0)
    cbar_ax.tick_params(labelsize=DV_fontsize*1.5)
    cbar_ax.set_ylabel('Factor Loading', rotation=-90, fontsize=DV_fontsize*2)
    
    # draw lines separating groups
    if grouping is not None:
        factor_breaks = np.cumsum([len(i[1]) for i in grouping])[:-1]
        for y_val in factor_breaks:
            ax.hlines(y_val, 0, loadings.shape[1], lw=size/5, 
                      color='grey', linestyle='dashed')
                
    if plot_dir:
        filename = 'factor_heatmap_EFA%s.%s' % (c, ext)
        save_figure(f, path.join(plot_dir, filename), 
                    {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
Exemplo n.º 20
0
# In[ ]:

tmp = []
for i, group in all_reconstructions.groupby(['approach', 'pop_size']):
    group = group.loc[:, ['var', 'mean']].set_index('var')
    group.columns = [i]
    tmp.append(group)
approach_compare = pd.concat(tmp, axis=1)
approach_compare.columns = [
    i + ': ' + str(int(j)) for i, j in approach_compare.columns
]
# correlation of reconstructions
corr = approach_compare.corr(method='spearman')
overall_correlation = np.mean(corr.values[np.tril_indices_from(corr, -1)])
print('DV reconstruction score correlates %s across approaches' %
      format_num(overall_correlation))

# Model reconstruction success as a function of DV characteristics, approach and subpopulation size

# In[ ]:

all_reconstructions.loc[:, 'z_mean'] = np.arctanh(all_reconstructions['mean'])
md = smf.mixedlm(
    "z_mean ~ (pop_size + Q('icc3.k') + communality)*C(approach, Sum)",
    all_reconstructions,
    groups=all_reconstructions["var"])
mdf = md.fit()
mdf.summary()

# other way to do it
# endog, exog = patsy.dmatrices("z_mean ~ (pop_size + icc + avg_correlation)*C(approach, Sum)", all_reconstructions, return_type='dataframe')
Exemplo n.º 21
0
def importance_polar_plots(predictions,
                           target_order=None,
                           show_sign=True,
                           colorbar=True,
                           size=5,
                           dpi=300,
                           filename=None):
    # set up color styling
    palette = sns.color_palette('Blues_d', 100)
    #palette = sns.cubehelix_palette(100)
    # plot
    if target_order is None:
        target_order = list(predictions.values())[0].keys()
    N = len(target_order)
    f = plt.figure(figsize=(size, size))
    background_ax = f.add_axes([0, 0, 1, 1])
    polar_axes = []
    subplot_size = 1 / N
    # get max r2
    max_r2 = 0
    for prediction in predictions.values():
        vals = [prediction[i] for i in target_order]
        max_r2 = max(max_r2, max([i['scores_cv'][0]['R2'] for i in vals]))
    for row_i, (name, prediction) in enumerate(predictions.items()):
        # get importances
        vals = [prediction[i] for i in target_order]
        importances = [(i['predvars'], i['importances'][0]) for i in vals]
        r2s = [i['scores_cv'][0]['R2'] for i in vals]
        for i, target in enumerate(target_order):
            xticklabels = True
            polar_axes.append(
                f.add_axes([
                    subplot_size * i * 1.3, row_i * 1.4 * subplot_size,
                    subplot_size, subplot_size
                ],
                           projection='polar'))
            importance = importances[i]
            visualize_importance(importance,
                                 polar_axes[-1],
                                 yticklabels=False,
                                 xticklabels=xticklabels,
                                 label_size=size * 1.5,
                                 color=palette[max(
                                     int(r2s[i] / max_r2 * len(palette)) - 1,
                                     0)],
                                 outline_color='k',
                                 axes_linewidth=size / 20,
                                 label_scale=.25,
                                 show_sign=show_sign)
            polar_axes[-1].text(.5,
                                -.2,
                                'R2: ' + format_num(r2s[i]),
                                zorder=5,
                                fontsize=size * 1.5,
                                fontweight='bold',
                                ha='center',
                                transform=polar_axes[-1].transAxes)
            # change axis color
            polar_axes[-1].grid(color=[.6, .6, .6])
            polar_axes[-1].set_facecolor((0.91, 0.91, 0.94, 1.0))
    # add column labels
    for i, label in enumerate(target_order):
        pos = polar_axes[i - 3].get_position().bounds
        x_pos = pos[0] + pos[2] * .5
        y_pos = pos[1] + pos[3]
        background_ax.text(x_pos,
                           y_pos + .05,
                           '\n'.join(label.split()),
                           fontsize=size * 2,
                           fontweight='bold',
                           ha='center')
    # add row labels
    for i, key in enumerate(predictions.keys()):
        pos = polar_axes[i * N].get_position().bounds
        x_pos = pos[0]
        y_pos = pos[1] + pos[3] * .5
        background_ax.text(x_pos - .1,
                           y_pos,
                           ' '.join(key.title().split('_')),
                           fontsize=size * 2,
                           fontweight='bold',
                           va='center',
                           rotation=90)
    # make background ax invisible
    background_ax.tick_params(bottom=False,
                              left=False,
                              labelbottom=False,
                              labelleft=False)
    # add colorbar
    if colorbar == True:
        # get x position of center plots
        if N % 2 == 1:
            pos = polar_axes[N // 2].get_position().bounds
            x_pos = pos[0] + pos[2] * .5
        else:
            pos1 = polar_axes[N // 2 - 1].get_position().bounds
            pos2 = polar_axes[N // 2].get_position().bounds
            x_pos = (pos2[0] - (pos1[0] + pos[2])) * 2 + pos[0] + pos[2]

        color_ax = f.add_axes([x_pos - .3, -.2, .6, .025])
        cbar = mpl.colorbar.ColorbarBase(ax=color_ax,
                                         cmap=ListedColormap(palette),
                                         orientation='horizontal')
        cbar.set_ticks([0, 1])
        cbar.set_ticklabels([0, format_num(max_r2)])
        color_ax.tick_params(labelsize=size)
        cbar.set_label('R2', fontsize=size * 1.5)
    for key, spine in background_ax.spines.items():
        spine.set_visible(False)
    if filename is not None:
        save_figure(f, filename, {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
    else:
        return f
def plot_dendrogram(loading, clustering, title=None, 
                    break_lines=True, drop_list=None, double_drop_list=None,
                    absolute_loading=False,  size=4.6,  dpi=300, 
                    filename=None):
    """ Plots HCA results as dendrogram with loadings underneath
    
    Args:
        loading: pandas df, a results EFA loading matrix
        clustering: pandas df, a results HCA clustering
        title (optional): str, title to plot
        break_lines: whether to separate EFA heatmap based on clusters, default=True
        drop_list (optional): list of cluster indices to drop the cluster label
        drop_list (optional): list of cluster indices to drop the cluster label twice
        absolute_loading: whether to plot the absolute loading value, default False
        plot_dir: if set, where to save the plot
        
    """


    c = loading.shape[1]
    # extract cluster vars
    link = clustering['linkage']
    DVs = clustering['clustered_df'].columns
    ordered_loading = loading.loc[DVs]
    if absolute_loading:
        ordered_loading = abs(ordered_loading)
    # get cluster sizes
    labels=clustering['labels']
    cluster_sizes = [np.sum(labels==(i+1)) for i in range(max(labels))]
    link_function, colors = get_dendrogram_color_fun(link, clustering['reorder_vec'],
                                                     labels)
    
    # set figure properties
    figsize = (size, size*.6)
    # set up axes' size 
    heatmap_height = ordered_loading.shape[1]*.035
    heat_size = [.1, heatmap_height]
    dendro_size=[np.sum(heat_size), .3]
    # set up plot axes
    dendro_size = [.15,dendro_size[0], .78, dendro_size[1]]
    heatmap_size = [.15,heat_size[0],.78,heat_size[1]]
    cbar_size = [.935,heat_size[0],.015,heat_size[1]]
    ordered_loading = ordered_loading.T

    with sns.axes_style('white'):
        fig = plt.figure(figsize=figsize)
        ax1 = fig.add_axes(dendro_size) 
        # **********************************
        # plot dendrogram
        # **********************************
        with plt.rc_context({'lines.linewidth': size*.125}):
            dendrogram(link, ax=ax1, link_color_func=link_function,
                       orientation='top')
        # change axis properties
        ax1.tick_params(axis='x', which='major', labelsize=14,
                        labelbottom=False)
        ax1.get_yaxis().set_visible(False)
        ax1.spines['top'].set_visible(False)
        ax1.spines['right'].set_visible(False)
        ax1.spines['bottom'].set_visible(False)
        ax1.spines['left'].set_visible(False)
        # **********************************
        # plot loadings as heatmap below
         # **********************************
        ax2 = fig.add_axes(heatmap_size)
        cbar_ax = fig.add_axes(cbar_size)
        max_val = np.max(abs(loading.values))
        # bring to closest .25
        max_val = ceil(max_val*4)/4
        sns.heatmap(ordered_loading, ax=ax2, 
                    cbar=True, cbar_ax=cbar_ax,
                    yticklabels=True,
                    xticklabels=True,
                    vmax =  max_val, vmin = -max_val,
                    cbar_kws={'orientation': 'vertical',
                              'ticks': [-max_val, 0, max_val]},
                    cmap=sns.diverging_palette(220,15,n=100,as_cmap=True))
        ax2.set_yticklabels(ax2.get_yticklabels(), rotation=0)
        ax2.tick_params(axis='y', labelsize=size*heat_size[1]*30/c, pad=size/4, length=0)            
        # format cbar axis
        cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)])
        cbar_ax.tick_params(labelsize=size*heat_size[1]*25/c, length=0, pad=size/2)
        cbar_ax.set_ylabel('Factor Loading', rotation=-90, 
                       fontsize=size*heat_size[1]*30/c, labelpad=size*2)
        # add lines to heatmap to distinguish clusters
        if break_lines == True:
            xlim = ax2.get_xlim(); 
            ylim = ax2.get_ylim()
            step = xlim[1]/len(labels)
            cluster_breaks = [i*step for i in np.cumsum(cluster_sizes)]
            ax2.vlines(cluster_breaks[:-1], ylim[0], ylim[1], linestyles='dashed',
                       linewidth=size*.1, colors=[.5,.5,.5], zorder=10)
        # **********************************
        # plot cluster names
        # **********************************
        beginnings = np.hstack([[0],np.cumsum(cluster_sizes)[:-1]])
        centers = beginnings+np.array(cluster_sizes)//2+.5
        offset = .07
        if 'cluster_names' in clustering.keys():
            ax2.tick_params(axis='x', reset=True, top=False, bottom=False, width=size/8, length=0)
            names = [transform_name(i) for i in clustering['cluster_names']]
            ax2.set_xticks(centers)
            ax2.set_xticklabels(names, rotation=0, ha='center', 
                                fontsize=heatmap_size[2]*size*1)
            ticks = ax2.xaxis.get_ticklines()[::2]
            for i, label in enumerate(ax2.get_xticklabels()):
                if label.get_text() != '':
                    ax2.hlines(c+offset,beginnings[i]+.5,beginnings[i]+cluster_sizes[i]-.5, 
                               clip_on=False, color=colors[i], linewidth=size/5)
                    label.set_color(colors[i])
                    ticks[i].set_color(colors[i])
                    y_drop = .005
                    line_drop = .3
                    if drop_list and i in drop_list:
                        y_drop = .05
                        line_drop = 1.6
                    if double_drop_list and i in double_drop_list:
                        y_drop = .1
                        line_drop = 2.9
                    label.set_y(-(y_drop/heatmap_height+heatmap_height/c*offset))
                    ax2.vlines(beginnings[i]+cluster_sizes[i]/2, 
                               c+offset, c+offset+line_drop,
                               clip_on=False, color=colors[i], 
                               linewidth=size/7.5)

        # add title
        if title:
            ax1.set_title(title, fontsize=size*2, y=1.05)
            
    if filename is not None:
        save_figure(fig, filename,
                    {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
    else:
        return fig
Exemplo n.º 23
0
def plot_communality(results,
                     c,
                     rotate='oblimin',
                     retest_threshold=.2,
                     size=4.6,
                     dpi=300,
                     ext='png',
                     plot_dir=None):
    EFA = results.EFA
    communality = get_communality(EFA, rotate, c)
    # load retest data
    retest_data = get_retest_data(
        dataset=results.dataset.replace('Complete', 'Retest'))
    if retest_data is None:
        print('No retest data found for datafile: %s' % results.dataset)
        return

    # reorder data in line with communality
    retest_data = retest_data.loc[communality.index]
    # reformat variable names
    communality.index = format_variable_names(communality.index)
    retest_data.index = format_variable_names(retest_data.index)
    if len(retest_data) > 0:
        adjusted_communality,correlation, noise_ceiling = \
                get_adjusted_communality(communality,
                                         retest_data,
                                         retest_threshold)

    # plot communality bars woo!
    if len(retest_data) > 0:
        f, axes = plt.subplots(1, 3, figsize=(3 * (size / 10), size))

        plot_bar_factor(communality,
                        axes[0],
                        width=size / 10,
                        height=size,
                        label_rows=True,
                        title='Communality')
        plot_bar_factor(noise_ceiling,
                        axes[1],
                        width=size / 10,
                        height=size,
                        label_rows=False,
                        title='Test-Retest')
        plot_bar_factor(adjusted_communality,
                        axes[2],
                        width=size / 10,
                        height=size,
                        label_rows=False,
                        title='Adjusted Communality')
    else:
        f = plot_bar_factor(communality,
                            label_rows=True,
                            width=size / 3,
                            height=size * 2,
                            title='Communality')
    if plot_dir:
        filename = 'communality_bars-EFA%s.%s' % (c, ext)
        save_figure(f, path.join(plot_dir, filename), {
            'bbox_inches': 'tight',
            'dpi': dpi
        })
        plt.close()

    # plot communality histogram
    if len(retest_data) > 0:
        with sns.axes_style('white'):
            colors = sns.color_palette(n_colors=2, desat=.75)
            f, ax = plt.subplots(1, 1, figsize=(size, size))
            sns.kdeplot(communality,
                        linewidth=size / 4,
                        shade=True,
                        label='Communality',
                        color=colors[0])
            sns.kdeplot(adjusted_communality,
                        linewidth=size / 4,
                        shade=True,
                        label='Adjusted Communality',
                        color=colors[1])
            ylim = ax.get_ylim()
            ax.vlines(np.mean(communality),
                      ylim[0],
                      ylim[1],
                      color=colors[0],
                      linewidth=size / 4,
                      linestyle='--')
            ax.vlines(np.mean(adjusted_communality),
                      ylim[0],
                      ylim[1],
                      color=colors[1],
                      linewidth=size / 4,
                      linestyle='--')
            leg = ax.legend(fontsize=size * 2, loc='upper right')
            beautify_legend(leg, colors)
            plt.xlabel('Communality', fontsize=size * 2)
            plt.ylabel('Normalized Density', fontsize=size * 2)
            ax.set_yticks([])
            ax.tick_params(labelsize=size)
            ax.set_ylim(0, ax.get_ylim()[1])
            ax.set_xlim(0, ax.get_xlim()[1])
            ax.spines['right'].set_visible(False)
            #ax.spines['left'].set_visible(False)
            ax.spines['top'].set_visible(False)
            # add correlation
            correlation = format_num(np.mean(correlation))
            ax.text(1.1,
                    1.25,
                    'Correlation Between Communality \nand Test-Retest: %s' %
                    correlation,
                    size=size * 2)

        if plot_dir:
            filename = 'communality_dist-EFA%s.%s' % (c, ext)
            save_figure(f, path.join(plot_dir, filename), {
                'bbox_inches': 'tight',
                'dpi': dpi
            })
            plt.close()
 loading_data = loading_data.multiply(reflects, axis=0)
 # plot loadings
 sns.heatmap(loading_data.iloc[::-1, :],
             ax=loading_axes[task_i],
             yticklabels=False,
             xticklabels=False,
             linecolor='white',
             linewidth=basewidth,
             cbar_ax=cbar_ax,
             vmax=max_val,
             vmin=-max_val,
             cbar_kws={'ticks': [-max_val, 0, max_val]},
             cmap=sns.diverging_palette(220, 16, n=100, as_cmap=True))
 # format cbar
 cbar_ax.set_yticklabels(
     [format_num(-max_val, 1), 0,
      format_num(max_val, 1)])
 cbar_ax.tick_params(axis='y', length=0)
 cbar_ax.tick_params(labelsize=basefont)
 for i in range(1, loading_data.shape[0] + 1):
     #loading_axes[task_i].hlines(i, -.2, 6.1, color='white', linewidth=basewidth*3)
     loading_axes[task_i].add_patch(
         Rectangle([-.1, i - .2],
                   width=loading_data.shape[1] + .2,
                   height=.2,
                   zorder=100,
                   facecolor='white',
                   edgecolor='white',
                   linewidth=basewidth,
                   clip_on=False))
 # add boxes
Exemplo n.º 25
0
def plot_heatmap_factors(results,
                         c,
                         size=4.6,
                         thresh=75,
                         rotate='oblimin',
                         DA=False,
                         dpi=300,
                         ext='png',
                         plot_dir=None):
    """ Plots factor analytic results as bars
    
    Args:
        results: a dimensional structure results object
        c: the number of components to use
        dpi: the final dpi for the image
        size: scalar - the width of the plot. The height is determined
            by the number of factors
        thresh: proportion of factor loadings to remove
        ext: the extension for the saved figure
        plot_dir: the directory to save the figure. If none, do not save
    """
    if DA:
        EFA = results.DA
    else:
        EFA = results.EFA
    loadings = EFA.get_loading(c, rotate=rotate)
    loadings = EFA.reorder_factors(loadings, rotate=rotate)
    grouping = get_factor_groups(loadings)
    flattened_factor_order = []
    for sublist in [i[1] for i in grouping]:
        flattened_factor_order += sublist
    loadings = loadings.loc[flattened_factor_order]
    # get threshold for loadings
    if thresh > 0:
        thresh_val = np.percentile(abs(loadings).values, thresh)
        print('Thresholding all loadings less than %s' %
              np.round(thresh_val, 3))
        loadings = loadings.mask(abs(loadings) <= thresh_val, 0)
        # remove variables that don't cross the threshold for any factor
        kept_vars = list(loadings.index[loadings.mean(1) != 0])
        print('%s Variables out of %s are kept after threshold' %
              (len(kept_vars), loadings.shape[0]))
        loadings = loadings.loc[kept_vars]
        # remove masked variabled from grouping
        threshed_groups = []
        for factor, group in grouping:
            group = [x for x in group if x in kept_vars]
            threshed_groups.append([factor, group])
        grouping = threshed_groups
    # change variable names to make them more readable
    loadings.index = format_variable_names(loadings.index)
    # set up plot variables
    DV_fontsize = size * 2 / (loadings.shape[0] // 2) * 30
    figsize = (size, size * 2)

    f = plt.figure(figsize=figsize)
    ax = f.add_axes([0, 0, .08 * loadings.shape[1], 1])
    cbar_ax = f.add_axes([.08 * loadings.shape[1] + .02, 0, .04, 1])

    max_val = abs(loadings).max().max()
    sns.heatmap(
        loadings,
        ax=ax,
        cbar_ax=cbar_ax,
        vmax=max_val,
        vmin=-max_val,
        cbar_kws={'ticks': [-max_val, -max_val / 2, 0, max_val / 2, max_val]},
        linecolor='white',
        linewidth=.01,
        cmap=sns.diverging_palette(220, 15, n=100, as_cmap=True))
    ax.set_yticks(np.arange(.5, loadings.shape[0] + .5, 1))
    ax.set_yticklabels(loadings.index, fontsize=DV_fontsize, rotation=0)
    ax.set_xticklabels(loadings.columns,
                       fontsize=min(size * 3, DV_fontsize * 1.5),
                       ha='center',
                       rotation=90)
    ax.tick_params(length=size * .5, width=size / 10)
    # format cbar
    cbar_ax.set_yticklabels([
        format_num(-max_val, 2),
        format_num(-max_val / 2, 2), 0,
        format_num(-max_val / 2, 2),
        format_num(max_val, 2)
    ])
    cbar_ax.tick_params(axis='y', length=0)
    cbar_ax.tick_params(labelsize=DV_fontsize * 1.5)
    cbar_ax.set_ylabel('Factor Loading',
                       rotation=-90,
                       fontsize=DV_fontsize * 2)

    # draw lines separating groups
    if grouping is not None:
        factor_breaks = np.cumsum([len(i[1]) for i in grouping])[:-1]
        for y_val in factor_breaks:
            ax.hlines(y_val,
                      0,
                      loadings.shape[1],
                      lw=size / 5,
                      color='grey',
                      linestyle='dashed')

    if plot_dir:
        filename = 'factor_heatmap_EFA%s.%s' % (c, ext)
        save_figure(f, path.join(plot_dir, filename), {
            'bbox_inches': 'tight',
            'dpi': dpi
        })
        plt.close()