def get_task_representations(self, tasks, c=None, rotate='oblimin'):
     """Take a list of tasks and reconstructs factor scores"""   
     if c is None:
         c = self.get_c()
         print('# of components not specified, using BIC determined #')         
     fa_output = self.results['factor_tree_Rout_%s' % rotate][c]
     output = {'weights': get_attr(fa_output, 'weights'),
               'scores': get_attr(fa_output, 'scores')}
     subset_scores, r2_scores = get_scores_from_subset(self.data,
                                                       output,
                                                       tasks)
     return subset_scores, r2_scores
Exemplo n.º 2
0
 def get_task_representations(self, tasks, c=None, rotate='oblimin'):
     """Take a list of tasks and reconstructs factor scores"""   
     if c is None:
         c = self.get_c()
         print('# of components not specified, using BIC determined #')         
     fa_output = self.results['factor_tree_Rout_%s' % rotate][c]
     output = {'weights': get_attr(fa_output, 'weights'),
               'scores': get_attr(fa_output, 'scores')}
     subset_scores, r2_scores = get_scores_from_subset(self.data,
                                                       output,
                                                       tasks)
     return subset_scores, r2_scores
Exemplo n.º 3
0
 def get_change(self, retest_dataset):
     demographics = self.data
     
     retest = get_demographics(retest_dataset)
     retest = residualize_baseline(retest, self.residualize_vars)
     if 'BMI' in retest.columns:
         retest.drop(['WeightPounds', 'HeightInches'], axis=1, inplace=True)
     # get common variables
     common_index = sorted(list(set(demographics.index) & set(retest.index)))
     common_columns = sorted(list(set(demographics.columns) & set(retest.columns)))
     demographics = demographics.loc[common_index, common_columns] 
     retest = retest.loc[common_index, common_columns]
     raw_change = retest-demographics
     # convert to scores
     c = self.get_c()
     demographic_factor_weights = get_attr(self.results['factor_tree_Rout_oblimin'][c],'weights')
     demographic_scores = scale(demographics).dot(demographic_factor_weights)
     retest_scores = scale(retest).dot(demographic_factor_weights)
     
     
     factor_change = pd.DataFrame(retest_scores-demographic_scores,
                           index=common_index,
                           columns = self.get_scores().columns)
     factor_change = self.reorder_factors(factor_change)
     factor_change.columns = [i + ' Change' for i in factor_change.columns]
     return factor_change, raw_change
Exemplo n.º 4
0
 def get_factor_reorder(self, c, rotate='oblimin'):
     # reorder factors based on correlation matrix
     phi=get_attr(self.results['factor_tree_Rout_%s' % rotate][c],'Phi')
     if phi is None:
         return list(range(c))
     new_order = list(leaves_list(linkage(squareform(np.round(1-phi,3)))))
     return new_order[::-1] # reversing because it works better for task EFA
Exemplo n.º 5
0
def transfer_scores(data, results, rotate='oblimin'):
    """ calculates factor scores in a new dataset based on a reference results object """
    ref_data = results.data
    EFA = results.EFA
    c = EFA.results['num_factors']
    loadings = EFA.get_loading(c=c, rotate=rotate)
    # transform data
    positive_skewed = [
        i.replace('.logTr', '') for i in ref_data.columns if ".logTr" in i
    ]
    negative_skewed = [
        i.replace('.ReflogTr', '') for i in ref_data.columns
        if ".ReflogTr" in i
    ]
    DVs = [
        i.replace('.logTr', '').replace('.ReflogTr', '')
        for i in ref_data.columns
    ]
    data = data.loc[:, DVs]
    print('using correct transfer_scores')
    data = transform_remove_skew(data,
                                 positive_skewed=positive_skewed,
                                 negative_skewed=negative_skewed,
                                 drop_failed=False)
    data = remove_outliers(data)
    data_imputed, error = missForest(data)
    subset = data_imputed.loc[:, loadings.index]
    scaled_data = scale(subset)
    # calculate scores
    weights = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c],
                       'weights')
    scores = pd.DataFrame(scaled_data.dot(weights),
                          index=data_imputed.index,
                          columns=loadings.columns)
    return scores
 def compute_higher_order_factors(self, c=None, rotate='oblimin'):
     """ Return higher order EFA """
     if c is None:
         c = self.get_c()
         print('# of components not specified, using BIC determined #')
     if ('factor_tree_%s' % rotate in self.results.keys() and 
         c in self.results['factor_tree_Rout_%s' % rotate].keys()):
         # get factor correlation matrix
         scores = get_attr(self.results['factor_tree_Rout_%s' % rotate][c], 'scores')
         phi = pd.DataFrame(np.corrcoef(scores.T))
         # check for correlations
         if np.mean(np.tril(phi, -1)) < 10E-5:
             return
         n_obs = self.data.shape[0]
         labels = list(self.results['factor_tree_%s' % rotate][c].columns)
         BIC_c, BICs = find_optimal_components(phi, 
                                               metric='BIC', 
                                               nobs=n_obs)
         if BIC_c != 0:
             if 'factor2_tree_%s' % rotate not in self.results.keys():
                 self.results['factor2_tree_%s' % rotate] = {}
                 self.results['factor2_tree_Rout_%s' % rotate] = {}
             Rout, higher_order_out = psychFA(phi, BIC_c, nobs=n_obs)
             loadings = get_loadings(higher_order_out, labels)
             self.results['factor2_tree_%s' % rotate][c] = loadings
             self.results['factor2_tree_Rout_%s' % rotate][c] = Rout
         else:
             print('Higher order factors could not be calculated')
     else:
         print('No %s factor solution computed yet!' % c)
 def get_factor_reorder(self, c, rotate='oblimin'):
     # reorder factors based on correlation matrix
     phi=get_attr(self.results['factor_tree_Rout_%s' % rotate][c],'Phi')
     if phi is None:
         return list(range(c))
     new_order = list(leaves_list(linkage(squareform(np.round(1-phi,3)))))
     return new_order[::-1] # reversing because it works better for task EFA
 def get_change(self, retest_dataset):
     demographics = self.data
     
     retest = get_demographics(retest_dataset)
     retest = residualize_baseline(retest, self.residualize_vars)
     if 'BMI' in retest.columns:
         retest.drop(['WeightPounds', 'HeightInches'], axis=1, inplace=True)
     # get common variables
     common_index = sorted(list(set(demographics.index) & set(retest.index)))
     common_columns = sorted(list(set(demographics.columns) & set(retest.columns)))
     demographics = demographics.loc[common_index, common_columns] 
     retest = retest.loc[common_index, common_columns]
     raw_change = retest-demographics
     # convert to scores
     c = self.get_c()
     demographic_factor_weights = get_attr(self.results['factor_tree_Rout_oblimin'][c],'weights')
     demographic_scores = scale(demographics).dot(demographic_factor_weights)
     retest_scores = scale(retest).dot(demographic_factor_weights)
     
     
     factor_change = pd.DataFrame(retest_scores-demographic_scores,
                           index=common_index,
                           columns = self.get_scores().columns)
     factor_change = self.reorder_factors(factor_change)
     factor_change.columns = [i + ' Change' for i in factor_change.columns]
     return factor_change, raw_change
Exemplo n.º 9
0
 def compute_higher_order_factors(self, c=None, rotate='oblimin'):
     """ Return higher order EFA """
     if c is None:
         c = self.get_c()
         print('# of components not specified, using BIC determined #')
     if ('factor_tree_%s' % rotate in self.results.keys() and 
         c in self.results['factor_tree_Rout_%s' % rotate].keys()):
         # get factor correlation matrix
         scores = get_attr(self.results['factor_tree_Rout_%s' % rotate][c], 'scores')
         phi = pd.DataFrame(np.corrcoef(scores.T))
         # check for correlations
         if np.mean(np.tril(phi, -1)) < 10E-5:
             return
         n_obs = self.data.shape[0]
         labels = list(self.results['factor_tree_%s' % rotate][c].columns)
         BIC_c, BICs = find_optimal_components(phi, 
                                               metric='BIC', 
                                               nobs=n_obs)
         if BIC_c != 0:
             if 'factor2_tree_%s' % rotate not in self.results.keys():
                 self.results['factor2_tree_%s' % rotate] = {}
                 self.results['factor2_tree_Rout_%s' % rotate] = {}
             Rout, higher_order_out = psychFA(phi, BIC_c, nobs=n_obs)
             loadings = get_loadings(higher_order_out, labels)
             self.results['factor2_tree_%s' % rotate][c] = loadings
             self.results['factor2_tree_Rout_%s' % rotate][c] = Rout
         else:
             print('Higher order factors could not be calculated')
     else:
         print('No %s factor solution computed yet!' % c)
 def get_boot_stats(self, c=None, rotate='oblimin'):
     if c is None:
         c = self.get_c()
         print('# of components not specified, using BIC determined #')
     if c in self.results['factor_tree_Rout_%s' % rotate].keys():
         bootstrap_Rout = self.results['factor_tree_Rout_%s' % rotate][c]
         if 'cis' in bootstrap_Rout.names:
             loadings = self.get_loading(c, rotate=rotate)
             bootstrap_stats = get_attr(bootstrap_Rout, 'cis')
             means = pd.DataFrame(get_attr(bootstrap_stats,'means'), 
                                  index=loadings.index,
                                  columns=loadings.columns)
             sds = pd.DataFrame(get_attr(bootstrap_stats,'sds'), 
                                  index=loadings.index,
                                  columns=loadings.columns)
             return {'means': means, 'sds': sds}
         else:
             print('No bootstrap has been run for EFA with %s factors' % c)
             return None
     else:
         print("EFA hasn't been run for %s factors" % c)
         return None
def get_communality(EFA, rotate='oblimin', c=None):
    if c is None:
        c = EFA.get_c()
    loading = EFA.get_loading(c, rotate=rotate)
    # get communality from psych out
    fa = EFA.results['factor_tree_Rout_%s' % rotate][c]
    communality = get_attr(fa, 'communalities')
    communality = pd.Series(communality, index=loading.index)
    # alternative calculation
    #communality = (loading**2).sum(1).sort_values()
    communality.index = [i.replace('.logTr','').replace('.ReflogTr','') for i in communality.index]
    communality.name = "communality"
    return communality
Exemplo n.º 12
0
 def get_boot_stats(self, c=None, rotate='oblimin'):
     if c is None:
         c = self.get_c()
         print('# of components not specified, using BIC determined #')
     if c in self.results['factor_tree_Rout_%s' % rotate].keys():
         bootstrap_Rout = self.results['factor_tree_Rout_%s' % rotate][c]
         if 'cis' in bootstrap_Rout.names:
             loadings = self.get_loading(c, rotate=rotate)
             bootstrap_stats = get_attr(bootstrap_Rout, 'cis')
             means = pd.DataFrame(get_attr(bootstrap_stats,'means'), 
                                  index=loadings.index,
                                  columns=loadings.columns)
             sds = pd.DataFrame(get_attr(bootstrap_stats,'sds'), 
                                  index=loadings.index,
                                  columns=loadings.columns)
             return {'means': means, 'sds': sds}
         else:
             print('No bootstrap has been run for EFA with %s factors' % c)
             return None
     else:
         print("EFA hasn't been run for %s factors" % c)
         return None
def calc_EFA_retest_held_out(results, rotate='oblimin', verbose=True):
    name = results.ID.split('_')[0].title()
    orig_data = results.data
    positive_skewed = [i.replace('.logTr', '') for i in orig_data.columns if ".logTr" in i]
    negative_skewed = [i.replace('.ReflogTr', '') for i in orig_data.columns if ".ReflogTr" in i]
    DVs = [i.replace('.logTr','').replace('.ReflogTr','') for i in orig_data.columns]
    orig_scores = results.EFA.get_scores(rotate=rotate)
    
    # load and clean retest data exactly like original data
    data_raw = get_behav_data(dataset=results.dataset,
                              file='meaningful_variables.csv')
    retest_data_raw = get_behav_data(dataset=results.dataset.replace('Complete','Retest'),
                                     file='meaningful_variables.csv')
    shared_ids = set(retest_data_raw.index) & set(data_raw.index)
    data_raw = data_raw.loc[shared_ids, :]
    retest_data_raw = retest_data_raw.loc[shared_ids, :]
    raw_data = {'T1': data_raw, 'T2': retest_data_raw}
    imputed_data = {}
    for name, data in raw_data.items():  
        tmp_data = data.loc[:, DVs]
        tmp_data = transform_remove_skew(tmp_data,
                                         positive_skewed=positive_skewed,
                                         negative_skewed=negative_skewed)
        tmp_data = remove_outliers(tmp_data)
        tmp_data_imputed, error = missForest(tmp_data)  
        scaled_tmp_data = scale(tmp_data_imputed)
        imputed_data[name] = scaled_tmp_data
    
    # get subjects not in the retest set
    ind_data = orig_data.loc[set(orig_data.index)-shared_ids]
    fa, output = psychFA(ind_data, results.EFA.results['num_factors'], 
                         method='ml', rotate=rotate)
    weights = get_attr(fa, 'weights')
    scores = {}
    for name, data in imputed_data.items():
        suffix=''
        if name=='T2': suffix='T2'
        tmp_scores = pd.DataFrame(data.dot(weights),
                                  index=shared_ids,
                                  columns=[i+' '+suffix for i in orig_scores.columns])
        scores[name] = tmp_scores
    combined = pd.concat([scores['T1'], scores['T2']], axis=1)
    cross_diag = [combined.corr().iloc[i,i+len(orig_scores.columns)] 
                    for i in range(len(orig_scores.columns))]
        # get ICCs
    ICCs = []
    for col in scores['T1'].columns:
        tmp = combined.filter(regex=col)
        out = psych.ICC(tmp)
        ICCs.append(list(out[0][1])[-1])
    return combined, cross_diag, ICCs, (fa, output)
Exemplo n.º 14
0
def get_communality(EFA, rotate='oblimin', c=None):
    if c is None:
        c = EFA.get_c()
    loading = EFA.get_loading(c, rotate=rotate)
    # get communality from psych out
    fa = EFA.results['factor_tree_Rout_%s' % rotate][c]
    communality = get_attr(fa, 'communalities')
    communality = pd.Series(communality, index=loading.index)
    # alternative calculation
    #communality = (loading**2).sum(1).sort_values()
    communality.index = [
        i.replace('.logTr', '').replace('.ReflogTr', '')
        for i in communality.index
    ]
    communality.name = "communality"
    return communality
def plot_factor_correlation(results, c, rotate='oblimin', title=True,
                            DA=False, size=4.6, dpi=300, ext='png', plot_dir=None):
    if DA:
        EFA = results.DA
    else:
        EFA = results.EFA
    loading = EFA.get_loading(c, rotate=rotate)
    # get factor correlation matrix
    reorder_vec = EFA.get_factor_reorder(c)
    phi = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c],'Phi')
    phi = pd.DataFrame(phi, columns=loading.columns, index=loading.columns)
    phi = phi.iloc[reorder_vec, reorder_vec]
    mask = np.zeros_like(phi)
    mask[np.tril_indices_from(mask, -1)] = True
    with sns.plotting_context('notebook', font_scale=2) and sns.axes_style('white'):
        f = plt.figure(figsize=(size*5/4, size))
        ax1 = f.add_axes([0,0,.9,.9])
        cbar_ax = f.add_axes([.91, .05, .03, .8])
        sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1,
                    cbar_ax=cbar_ax, 
                    cmap=sns.diverging_palette(220,15,n=100,as_cmap=True))
        sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1,
                    cbar_ax=cbar_ax, annot=True, annot_kws={"size": size/c*15},
                    cmap=sns.diverging_palette(220,15,n=100,as_cmap=True),
                    mask=mask)
        yticklabels = ax1.get_yticklabels()
        ax1.set_yticklabels(yticklabels, rotation=0, ha="right")
        ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90)
        if title == True:
            ax1.set_title('%s Factor Correlations' % results.ID.split('_')[0].title(),
                      weight='bold', y=1.05, fontsize=size*3)
        ax1.tick_params(labelsize=size*3)
        # format cbar
        cbar_ax.tick_params(axis='y', length=0)
        cbar_ax.tick_params(labelsize=size*2)
        cbar_ax.set_ylabel('Pearson Correlation', rotation=-90, labelpad=size*4, fontsize=size*3)
    
    if plot_dir:
        filename = 'factor_correlations_EFA%s.%s' % (c, ext)
        save_figure(f, path.join(plot_dir, filename), 
                    {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
Exemplo n.º 16
0
def transfer_scores(data, results, rotate='oblimin'):
    """ calculates factor scores in a new dataset based on a reference results object """
    ref_data = results.data
    EFA = results.EFA
    c = EFA.results['num_factors']
    loadings = EFA.get_loading(c=c, rotate=rotate)
    # transform data
    positive_skewed = [i.replace('.logTr', '') for i in ref_data.columns if ".logTr" in i]
    negative_skewed = [i.replace('.ReflogTr', '') for i in ref_data.columns if ".ReflogTr" in i]
    DVs = [i.replace('.logTr','').replace('.ReflogTr','') for i in ref_data.columns]
    data = data.loc[:, DVs]
    data = transform_remove_skew(data,
                                 positive_skewed=positive_skewed,
                                 negative_skewed=negative_skewed)
    data = remove_outliers(data)
    data_imputed, error = missForest(data)
    subset = data_imputed.loc[:, loadings.index]
    scaled_data = scale(subset)
    # calculate scores
    weights = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c], 'weights')
    scores = pd.DataFrame(scaled_data.dot(weights),
                          index=data_imputed.index,
                          columns=loadings.columns)
    return scores
def calc_EFA_retest_held_out(results, rotate='oblimin', verbose=True):
    name = results.ID.split('_')[0].title()
    orig_data = results.data
    positive_skewed = [
        i.replace('.logTr', '') for i in orig_data.columns if ".logTr" in i
    ]
    negative_skewed = [
        i.replace('.ReflogTr', '') for i in orig_data.columns
        if ".ReflogTr" in i
    ]
    DVs = [
        i.replace('.logTr', '').replace('.ReflogTr', '')
        for i in orig_data.columns
    ]
    orig_scores = results.EFA.get_scores(rotate=rotate)

    # load and clean retest data exactly like original data
    data_raw = get_behav_data(dataset=results.dataset,
                              file='meaningful_variables.csv')
    retest_data_raw = get_behav_data(dataset=results.dataset.replace(
        'Complete', 'Retest'),
                                     file='meaningful_variables.csv')
    shared_ids = set(retest_data_raw.index) & set(data_raw.index)
    data_raw = data_raw.loc[shared_ids, :]
    retest_data_raw = retest_data_raw.loc[shared_ids, :]
    raw_data = {'T1': data_raw, 'T2': retest_data_raw}
    imputed_data = {}
    for name, data in raw_data.items():
        tmp_data = data.loc[:, DVs]
        tmp_data = transform_remove_skew(tmp_data,
                                         positive_skewed=positive_skewed,
                                         negative_skewed=negative_skewed)
        tmp_data = remove_outliers(tmp_data)
        tmp_data_imputed, error = missForest(tmp_data)
        scaled_tmp_data = scale(tmp_data_imputed)
        imputed_data[name] = scaled_tmp_data

    # get subjects not in the retest set
    ind_data = orig_data.loc[set(orig_data.index) - shared_ids]
    fa, output = psychFA(ind_data,
                         results.EFA.results['num_factors'],
                         method='ml',
                         rotate=rotate)
    weights = get_attr(fa, 'weights')
    scores = {}
    for name, data in imputed_data.items():
        suffix = ''
        if name == 'T2': suffix = 'T2'
        tmp_scores = pd.DataFrame(
            data.dot(weights),
            index=shared_ids,
            columns=[i + ' ' + suffix for i in orig_scores.columns])
        scores[name] = tmp_scores
    combined = pd.concat([scores['T1'], scores['T2']], axis=1)
    cross_diag = [
        combined.corr().iloc[i, i + len(orig_scores.columns)]
        for i in range(len(orig_scores.columns))
    ]
    # get ICCs
    ICCs = []
    for col in scores['T1'].columns:
        tmp = combined.filter(regex=col)
        out = psych.ICC(tmp)
        ICCs.append(list(out[0][1])[-1])
    return combined, cross_diag, ICCs, (fa, output)
def plot_cross_communality(all_results, rotate='oblimin', retest_threshold=.2,
                           size=4.6, dpi=300, ext='png', plot_dir=None):
    
    retest_data = None
    num_cols = 2
    num_rows = math.ceil(len(all_results.keys())/2)
    with sns.axes_style('white'):
        f, axes = plt.subplots(num_rows, num_cols, figsize=(size, size/2*num_rows))
    max_y = 0
    for i, (name, results) in enumerate(all_results.items()):
        if retest_data is None:
            # load retest data
            retest_data = get_retest_data(dataset=results.dataset.replace('Complete','Retest'))
            if retest_data is None:
                print('No retest data found for datafile: %s' % results.dataset)
        c = results.EFA.get_c()
        EFA = results.EFA
        loading = EFA.get_loading(c, rotate=rotate)
        # get communality from psych out
        fa = EFA.results['factor_tree_Rout_%s' % rotate][c]
        communality = get_attr(fa, 'communalities')
        communality = pd.Series(communality, index=loading.index)
        # alternative calculation
        #communality = (loading**2).sum(1).sort_values()
        communality.index = [i.replace('.logTr','') for i in communality.index]
        
        # reorder data in line with communality
        retest_subset= retest_data.loc[communality.index]
        # reformat variable names
        communality.index = format_variable_names(communality.index)
        retest_subset.index = format_variable_names(retest_subset.index)
        if len(retest_subset) > 0:
            # noise ceiling
            noise_ceiling = retest_subset.pearson
            # remove very low reliabilities
            if retest_threshold:
                noise_ceiling[noise_ceiling<retest_threshold]= np.nan
            # adjust
            adjusted_communality = communality/noise_ceiling
            
        # plot communality histogram
        if len(retest_subset) > 0:
            ax = axes[i]
            ax.set_title(name.title(), fontweight='bold', fontsize=size*2)
            colors = sns.color_palette(n_colors=2, desat=.75)
            sns.kdeplot(communality, linewidth=size/4, ax=ax, vertical=True,
                        shade=True, label='Communality', color=colors[0])
            sns.kdeplot(adjusted_communality, linewidth=size/4, ax=ax, vertical=True,
                        shade=True, label='Adjusted Communality', color=colors[1])
            xlim = ax.get_xlim()
            ax.hlines(np.mean(communality), xlim[0], xlim[1],
                      color=colors[0], linewidth=size/4, linestyle='--')
            ax.hlines(np.mean(adjusted_communality), xlim[0], xlim[1],
                      color=colors[1], linewidth=size/4, linestyle='--')
            ax.set_xticks([])
            ax.tick_params(labelsize=size*1.2)
            ax.set_ylim(0, ax.get_ylim()[1])
            ax.set_xlim(0, ax.get_xlim()[1])
            ax.spines['right'].set_visible(False)
            ax.spines['top'].set_visible(False)
            if (i+1) == len(all_results):
                ax.set_xlabel('Normalized Density', fontsize=size*2)
                leg=ax.legend(fontsize=size*1.5, loc='upper right',
                              bbox_to_anchor=(1.2, 1.0), 
                              handlelength=0, handletextpad=0)
                beautify_legend(leg, colors)
            elif i>=len(all_results)-2:
                ax.set_xlabel('Normalized Density', fontsize=size*2)
                ax.legend().set_visible(False)
            else:
                ax.legend().set_visible(False)
            if i%2==0:
                ax.set_ylabel('Communality', fontsize=size*2)
                ax.tick_params(labelleft=True, left=True, 
                               length=size/4, width=size/8)
            else:
                ax.tick_params(labelleft=False, left=True, 
                               length=0, width=size/8)
            # update max_x
            if ax.get_ylim()[1] > max_y:
                max_y = ax.get_ylim()[1]
            ax.grid(False)
            [i.set_linewidth(size*.1) for i in ax.spines.values()]
        for ax in axes:
            ax.set_ylim((0, max_y))
        plt.subplots_adjust(wspace=0)
                    
        if plot_dir:
            filename = 'communality_adjustment.%s' % ext
            save_figure(f, path.join(plot_dir, rotate, filename), 
                        {'bbox_inches': 'tight', 'dpi': dpi})
            plt.close()
 def _get_attr(self, attribute, c=None, rotate='oblimin'):
     if c is None:
         c = self.get_c()
         print('# of components not specified, using BIC determined #')
     return get_attr(self.results['factor_tree_Rout_%s' % rotate][c],
                     attribute)
Exemplo n.º 20
0
 def _get_attr(self, attribute, c=None, rotate='oblimin'):
     if c is None:
         c = self.get_c()
         print('# of components not specified, using BIC determined #')
     return get_attr(self.results['factor_tree_Rout_%s' % rotate][c],
                     attribute)
def plot_cross_communality(all_results,
                           rotate='oblimin',
                           retest_threshold=.2,
                           size=4.6,
                           dpi=300,
                           ext='png',
                           plot_dir=None):

    retest_data = None
    num_cols = 2
    num_rows = math.ceil(len(all_results.keys()) / 2)
    with sns.axes_style('white'):
        f, axes = plt.subplots(num_rows,
                               num_cols,
                               figsize=(size, size / 2 * num_rows))
    max_y = 0
    for i, (name, results) in enumerate(all_results.items()):
        if retest_data is None:
            # load retest data
            retest_data = get_retest_data(
                dataset=results.dataset.replace('Complete', 'Retest'))
            if retest_data is None:
                print('No retest data found for datafile: %s' %
                      results.dataset)
        c = results.EFA.get_c()
        EFA = results.EFA
        loading = EFA.get_loading(c, rotate=rotate)
        # get communality from psych out
        fa = EFA.results['factor_tree_Rout_%s' % rotate][c]
        communality = get_attr(fa, 'communalities')
        communality = pd.Series(communality, index=loading.index)
        # alternative calculation
        #communality = (loading**2).sum(1).sort_values()
        communality.index = [
            i.replace('.logTr', '') for i in communality.index
        ]

        # reorder data in line with communality
        retest_subset = retest_data.loc[communality.index]
        # reformat variable names
        communality.index = format_variable_names(communality.index)
        retest_subset.index = format_variable_names(retest_subset.index)
        if len(retest_subset) > 0:
            # noise ceiling
            noise_ceiling = retest_subset.pearson
            # remove very low reliabilities
            if retest_threshold:
                noise_ceiling[noise_ceiling < retest_threshold] = np.nan
            # adjust
            adjusted_communality = communality / noise_ceiling

        # plot communality histogram
        if len(retest_subset) > 0:
            ax = axes[i]
            ax.set_title(name.title(), fontweight='bold', fontsize=size * 2)
            colors = sns.color_palette(n_colors=2, desat=.75)
            sns.kdeplot(communality,
                        linewidth=size / 4,
                        ax=ax,
                        vertical=True,
                        shade=True,
                        label='Communality',
                        color=colors[0])
            sns.kdeplot(adjusted_communality,
                        linewidth=size / 4,
                        ax=ax,
                        vertical=True,
                        shade=True,
                        label='Adjusted Communality',
                        color=colors[1])
            xlim = ax.get_xlim()
            ax.hlines(np.mean(communality),
                      xlim[0],
                      xlim[1],
                      color=colors[0],
                      linewidth=size / 4,
                      linestyle='--')
            ax.hlines(np.mean(adjusted_communality),
                      xlim[0],
                      xlim[1],
                      color=colors[1],
                      linewidth=size / 4,
                      linestyle='--')
            ax.set_xticks([])
            ax.tick_params(labelsize=size * 1.2)
            ax.set_ylim(0, ax.get_ylim()[1])
            ax.set_xlim(0, ax.get_xlim()[1])
            ax.spines['right'].set_visible(False)
            ax.spines['top'].set_visible(False)
            if (i + 1) == len(all_results):
                ax.set_xlabel('Normalized Density', fontsize=size * 2)
                leg = ax.legend(fontsize=size * 1.5,
                                loc='upper right',
                                bbox_to_anchor=(1.2, 1.0),
                                handlelength=0,
                                handletextpad=0)
                beautify_legend(leg, colors)
            elif i >= len(all_results) - 2:
                ax.set_xlabel('Normalized Density', fontsize=size * 2)
                ax.legend().set_visible(False)
            else:
                ax.legend().set_visible(False)
            if i % 2 == 0:
                ax.set_ylabel('Communality', fontsize=size * 2)
                ax.tick_params(labelleft=True,
                               left=True,
                               length=size / 4,
                               width=size / 8)
            else:
                ax.tick_params(labelleft=False,
                               left=True,
                               length=0,
                               width=size / 8)
            # update max_x
            if ax.get_ylim()[1] > max_y:
                max_y = ax.get_ylim()[1]
            ax.grid(False)
            [i.set_linewidth(size * .1) for i in ax.spines.values()]
        for ax in axes:
            ax.set_ylim((0, max_y))
        plt.subplots_adjust(wspace=0)

        if plot_dir:
            filename = 'communality_adjustment.%s' % ext
            save_figure(f, path.join(plot_dir, rotate, filename), {
                'bbox_inches': 'tight',
                'dpi': dpi
            })
            plt.close()
Exemplo n.º 22
0
def plot_factor_correlation(results,
                            c,
                            rotate='oblimin',
                            title=True,
                            DA=False,
                            size=4.6,
                            dpi=300,
                            ext='png',
                            plot_dir=None):
    if DA:
        EFA = results.DA
    else:
        EFA = results.EFA
    loading = EFA.get_loading(c, rotate=rotate)
    # get factor correlation matrix
    reorder_vec = EFA.get_factor_reorder(c)
    phi = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c], 'Phi')
    phi = pd.DataFrame(phi, columns=loading.columns, index=loading.columns)
    phi = phi.iloc[reorder_vec, reorder_vec]
    mask = np.zeros_like(phi)
    mask[np.tril_indices_from(mask, -1)] = True
    with sns.plotting_context('notebook',
                              font_scale=2) and sns.axes_style('white'):
        f = plt.figure(figsize=(size * 5 / 4, size))
        ax1 = f.add_axes([0, 0, .9, .9])
        cbar_ax = f.add_axes([.91, .05, .03, .8])
        sns.heatmap(phi,
                    ax=ax1,
                    square=True,
                    vmax=1,
                    vmin=-1,
                    cbar_ax=cbar_ax,
                    cmap=sns.diverging_palette(220, 15, n=100, as_cmap=True))
        sns.heatmap(phi,
                    ax=ax1,
                    square=True,
                    vmax=1,
                    vmin=-1,
                    cbar_ax=cbar_ax,
                    annot=True,
                    annot_kws={"size": size / c * 15},
                    cmap=sns.diverging_palette(220, 15, n=100, as_cmap=True),
                    mask=mask)
        yticklabels = ax1.get_yticklabels()
        ax1.set_yticklabels(yticklabels, rotation=0, ha="right")
        ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90)
        if title == True:
            ax1.set_title('%s Factor Correlations' %
                          results.ID.split('_')[0].title(),
                          weight='bold',
                          y=1.05,
                          fontsize=size * 3)
        ax1.tick_params(labelsize=size * 3)
        # format cbar
        cbar_ax.tick_params(axis='y', length=0)
        cbar_ax.tick_params(labelsize=size * 2)
        cbar_ax.set_ylabel('Pearson Correlation',
                           rotation=-90,
                           labelpad=size * 4,
                           fontsize=size * 3)

    if plot_dir:
        filename = 'factor_correlations_EFA%s.%s' % (c, ext)
        save_figure(f, path.join(plot_dir, filename), {
            'bbox_inches': 'tight',
            'dpi': dpi
        })
        plt.close()