def edgewise_pcorr(self, train_subs, fold, perm, method='pearson'): corr_dfs = [ ] # Appending to list and then creating dataframe is substantially faster than appending to dataframe empty_df = pd.DataFrame({ 'r': { 'pearson': np.nan }, 'p-val': { 'pearson': np.nan } }) # Handle all-zero edges train_data = self.data['data'].loc[train_subs] train_data.columns = train_data.columns.astype(str) N = len(self.data['edges']) n = 1 percent = round((n / N) * 100) for edge in self.data['edges']: # Edge-wise correlation if (train_data[edge] != 0).any( ): # All-zero columns will raise a ValueError exception. This is _way_ faster than try: except: if perm >= 0: y = "{}-perm-{}".format(self.data['behav'], perm) else: y = self.data['behav'] if self.data['covars']: pcorr = partial_corr( data=train_data, x=edge, y=y, covar=self.data['covars'], method=method )[[ 'r', 'p-val' ]] # Taking only the necessary columns speeds this up a few % pcorr['covars'] = True # Debug, remove later else: # We could also use pcorr from Pingouin on the entire df, but this is a prohibitively memory-intensive operation; edge-wise like this works just fine. This was introduced to test implausibly good results for the above operation by Pengouin's partial_corr, by setting covars=None we can use SciPy's implementation of Pearson's r pcorr = empty_df.copy() pcorr[['r', 'p-val']] = sp.stats.pearsonr( train_data.loc[:, edge], train_data.loc[:, y] ) # We are basically reproducing Pingouin's output format here for unified downstream processing pcorr['covars'] = False # Debug, remove later else: pcorr = empty_df corr_dfs.append(pcorr) percent_new = round((n / N) * 100) if perm >= 0: fold_msg = "{} of permutation {}".format(fold + 1, perm + 1) else: fold_msg = fold + 1 if percent_new > percent: self.status_update("Computing fold {} ({} %)...".format( fold_msg, percent_new)) percent = percent_new n += 1 self.status_update("Assembling data frame...") combined_corr_dfs = pd.concat( corr_dfs ) # Assembling df before .put() seems to avoid awfully slow pickling of data through queue (or whatever, it is orders of magnitude faster that way) return combined_corr_dfs
def calculate_partial_correlation(self): partial_correlations_list = [] data = pd.DataFrame(self.shap_values, columns=self.sensor_names) data["RUL"] = testing_RULs for sensor_name1 in self.sensor_names: sensor_names3 = [] for sensor_name2 in self.sensor_names: if sensor_name2 != sensor_name1: sensor_names3.append(sensor_name2) res = math.fabs(partial_corr(data=data, x=sensor_name1, y='RUL', y_covar=sensor_names3, method='pearson')['r'][0]) partial_correlations_list.append(res) print("Partial correl", partial_correlations_list) self.correlate_indicators(partial_correlations_list)
def calculateBrainBehaviorCorrelations(subjects, TRs, brainRegions): global ling1D, analog1D, presentedImages # Create multi-index subjectsInd = np.repeat(subjects, len(TRs)) TRsInd = np.tile(TRs, len(subjects)) behavioralInd = np.tile(['analog', 'ling', 'partial'], len(TRs) * len(subjects)) arrays = [subjectsInd, TRsInd, behavioralInd] tuples = list(zip(*arrays)) index = pd.MultiIndex.from_tuples( tuples, names=["subjects", "TRs", "behavioralMeasure"]) brainCorr = pd.DataFrame(columns=brainRegions, index=index) for subject in subjects: print(subject) for TR in TRs: print(TR) subjMat, stimList = loadSubjectBrainData(subject, TR) for region in brainRegions: regionMat = pd.DataFrame(subjMat[region].T, columns=stimList.iloc[:, 0]) regionMat = regionMat[regionMat.columns.intersection( presentedImages.iloc[:, 0])] regionCorr = regionMat.corr() regionCorr.sort_index(inplace=True, axis=0) regionCorr.sort_index(inplace=True, axis=1) regionCorrNP = regionCorr.to_numpy() regionCorr1D = np.empty(regionCorrNP.size) regionCorr1D = np.reshape(regionCorrNP, regionCorrNP.size) dfPartial['brain'] = regionCorr1D brainCorr.at[(subject, TR, 'analog'), region] = np.corrcoef(analog1D, regionCorr1D)[0, 1] brainCorr.at[(subject, TR, 'ling'), region] = np.corrcoef(ling1D, regionCorr1D)[0, 1] brainCorr.at[(subject, TR, 'partial'), region] = pg.partial_corr( data=dfPartial, x='analog', y='brain', covar='ling').round(3).r.pearson return brainCorr
def correlate(self, x_list=None, y_list=None, c_list=None, method="spearman", figfmt="png", skip=False): """Correlate variables. """ if skip: return self x_list = self.phtp_list if x_list is None else x_list y_list = self.phtp_list if y_list is None else y_list c_list = self.cvrt_list if c_list is None else c_list x_len, y_len = len(x_list), len(y_list) pcorr_mtrx_np = np.eye(x_len, y_len) for idx_x, pntp_x in enumerate(x_list): for idx_y, pntp_y in enumerate(y_list): if pntp_y != pntp_x: try: _pcorr_dtfm = pg.partial_corr(self.dataframe, pntp_x, pntp_y, self.cvrt_list, method=method) pcorr_mtrx_np[idx_x, idx_y] = _pcorr_dtfm['r'] except AssertionError as err: pass self.pcor_dtfm = pd.DataFrame(pcorr_mtrx_np, index=self.phtp_list, columns=self.phtp_list) pcorr_htmp_name = ".".join( [self.output_prefix, "correlation_heatmap", figfmt]) ctmp_grid = sb.clustermap(self.pcor_dtfm, col_cluster=True, row_cluster=True, cmap="Greens") ctmp_grid.fig.savefig(pcorr_htmp_name) plt.cla() plt.clf() plt.close() return self
def partial_corr_brainxdcnn(conv_out=0): layers_interest = list(range(1, nb_layers)) layers_interest = np.delete(layers_interest, [conv_out]) partialKendall_timec_df = pd.DataFrame() partial_pval_df = pd.DataFrame() partial_corr_timecourse = dict() # correlate brain and dcnn rdms for lay in layers_interest: layer_name = model.layers[lay].name # e.g. block1_conv1 print(f"{layer_name}") # save layer's RDM to correlate with human brain RDMs corr_rdms_df = pd.read_pickle( op.join(output_layer_rdms_dir, f'rdm_{layer_name}.pkl')) corr_rdms_array = normalise_dist( scipy.spatial.distance.squareform(1 - (np.asarray(corr_rdms_df)))) partialK_timecourse = [] partialK_pvals_timecourse = [] partial_corr_timecourse[layer_name] = {} for this_slice in range(len(times)): coeffs_temp = pg.partial_corr(data=big_df, x=f"{layer_name}", y=f"{times[this_slice].round(3)}", covar=[layers_names[conv_out]], method='kendall') partialK_timecourse.append(coeffs_temp.r.kendall) partialK_pvals_timecourse.append(coeffs_temp['p-val'].kendall) temp_df = pd.DataFrame({f'{layer_name}': partialK_timecourse}) temp_val_df = pd.DataFrame( {f'{layer_name}': partialK_pvals_timecourse}) partialKendall_timec_df = pd.concat([partialKendall_timec_df, temp_df], axis=1) partial_pval_df = pd.concat([partial_pval_df, temp_val_df], axis=1) return partialKendall_timec_df, partial_pval_df
def partial_corr(C, method='pearson'): p = C.shape[1] index = C.columns.tolist() P_corr = np.zeros((p, p), dtype=np.float) P_pval = np.zeros((p, p), dtype=np.float) for i in range(p): P_corr[i, i] = 1 P_pval[i, i] = 1 for j in range(i + 1, p): x, y = index[i], index[j] res = pg.partial_corr(data=C, x=x, y=y, covar=list(set(index) - set([x, y])), method=method) corr, pval = res['r'][0], res['p-val'][0] P_corr[i, j] = corr P_corr[j, i] = corr P_pval[i, j] = pval P_pval[j, i] = pval P_corr = pd.DataFrame(P_corr, index=index, columns=index) P_pval = pd.DataFrame(P_pval, index=index, columns=index) return (P_corr, P_pval)
def test_pandas(self): """Test pandas method. """ # Test the ANOVA (Pandas) aov = df.anova(dv='Scores', between='Group', detailed=True) assert aov.equals( pg.anova(dv='Scores', between='Group', detailed=True, data=df)) # Test the Welch ANOVA (Pandas) aov = df.welch_anova(dv='Scores', between='Group') assert aov.equals(pg.welch_anova(dv='Scores', between='Group', data=df)) # Test the repeated measures ANOVA (Pandas) aov = df.rm_anova(dv='Scores', within='Time', subject='Subject', detailed=True) assert aov.equals( pg.rm_anova(dv='Scores', within='Time', subject='Subject', detailed=True, data=df)) # FDR-corrected post hocs with Hedges'g effect size ttests = df.pairwise_ttests(dv='Scores', within='Time', subject='Subject', padjust='fdr_bh', effsize='hedges') assert ttests.equals( pg.pairwise_ttests(dv='Scores', within='Time', subject='Subject', padjust='fdr_bh', effsize='hedges', data=df)) # Test two-way mixed ANOVA aov = df.mixed_anova(dv='Scores', between='Group', within='Time', subject='Subject', correction=False) assert aov.equals( pg.mixed_anova(dv='Scores', between='Group', within='Time', subject='Subject', correction=False, data=df)) # Test parwise correlations corrs = data.pairwise_corr(columns=['X', 'M', 'Y'], method='spearman') corrs2 = pg.pairwise_corr(data=data, columns=['X', 'M', 'Y'], method='spearman') assert corrs['r'].equals(corrs2['r']) # Test partial correlation corrs = data.partial_corr(x='X', y='Y', covar='M', method='spearman') corrs2 = pg.partial_corr(x='X', y='Y', covar='M', method='spearman', data=data) assert corrs['r'].equals(corrs2['r']) # Test partial correlation matrix (compare with the ppcor package) corrs = data.pcorr().round(3) np.testing.assert_array_equal(corrs.iloc[0, :].values, [1, 0.392, 0.06, -0.014, -0.149]) # Now compare against Pingouin's own partial_corr function corrs = data[['X', 'Y', 'M']].pcorr() corrs2 = data.partial_corr(x='X', y='Y', covar='M') assert round(corrs.loc['X', 'Y'], 3) == corrs2.loc['pearson', 'r'] # Test mediation analysis med = data.mediation_analysis(x='X', m='M', y='Y', seed=42, n_boot=500) np.testing.assert_array_equal(med.loc[:, 'coef'].values, [0.5610, 0.6542, 0.3961, 0.0396, 0.3565])
ax.set_title(label='DepMap Screening in CCLE cell lines', fontdict={ 'fontweight': 'bold', 'fontsize': 14 }) # plt.show() plt.tight_layout() fig.savefig('_distribution.pdf') plt.close() # calculate partial correlation of all genes besides given gene, adjusted by tissue types corr_res = {} for g in depmap_screening.columns.tolist(): if g == gene_name: continue df = pd.concat( [depmap_screening[[gene_name, g]].astype('float'), tmp.astype('int')], axis=1).dropna() try: corr_res[g] = pg.partial_corr( df.astype('float'), x=g, y=gene_name, covar=tmp.columns.tolist()).loc['pearson', ['r', 'p-val']] except: continue corr_res = pd.DataFrame.from_dict(corr_res, orient='index') corr_res.columns = ['pcorr', 'ppval'] corr_res = corr_res.sort_values('pcorr', ascending=False) corr_res.to_csv('DepMap_screen_partial_corr_adjByTissueType.csv')
# Partial data = pd.read_csv('/home/atrides/Desktop/R/statistics_with_Python/06_Correlation/Data_Files/Exam Anxiety.dat', sep='\t') data = data[['Revise', 'Exam', 'Anxiety']] print(data.head()) import pingouin as pg print(data.pcorr()) # Using pingouin print(pg.partial_corr(data = data, x='Exam', y='Anxiety', covar='Revise')) # Semi-Partial Correlation print(pg.partial_corr(data=data, x='Exam' , y='Anxiety', x_covar='Revise')) #·partial correlation - quantifies the relationship between two variables while controlling for the effects of a third variable on both variables in the original correlation. # #·semi-partial correlation- quantifies the relationship between two variables while controlling for the effects of a third variable on only one of the variables in the original correlation
import pandas as pd import pingouin as pg if __name__ == '__main__': PATH = "../../data/ms1_encircle/" DATA = "ms1_encircle_corr.xlsx" data = pd.read_excel(PATH + DATA) # par correlation x = "deviation_score_mean" y = "groups_mean" covar = "numerosity" method = "pearson" # TODO # alignmentcon = "radial" alignmentcon = "tangential" winsize = 0.7 data_to_analysis = data[(data["winsize"] == winsize) & (data["crowdingcons"] == alignmentcon)] # data_to_analysis = data[(data["crowdingcons"] == alignmentcon)] partial_corr = pg.partial_corr(data_to_analysis, x = x, y = y, covar = covar, method = method)
def test_pandas(self): """Test pandas method. """ # Test the ANOVA (Pandas) aov = df.anova(dv='Scores', between='Group', detailed=True) assert aov.equals( pg.anova(dv='Scores', between='Group', detailed=True, data=df)) aov3_ss1 = df_aov3.anova(dv='Cholesterol', between=['Sex', 'Drug'], ss_type=1) aov3_ss2 = df_aov3.anova(dv='Cholesterol', between=['Sex', 'Drug'], ss_type=2) aov3_ss2_pg = pg.anova(dv='Cholesterol', between=['Sex', 'Drug'], data=df_aov3, ss_type=2) assert not aov3_ss1.equals(aov3_ss2) assert aov3_ss2.round(3).equals(aov3_ss2_pg.round(3)) # Test the Welch ANOVA (Pandas) aov = df.welch_anova(dv='Scores', between='Group') assert aov.equals(pg.welch_anova(dv='Scores', between='Group', data=df)) # Test the ANCOVA aov = df_anc.ancova(dv='Scores', covar='Income', between='Method').round(3) assert (aov.equals( pg.ancova(data=df_anc, dv='Scores', covar='Income', between='Method').round(3))) # Test the repeated measures ANOVA (Pandas) aov = df.rm_anova(dv='Scores', within='Time', subject='Subject', detailed=True) assert (aov.equals( pg.rm_anova(dv='Scores', within='Time', subject='Subject', detailed=True, data=df))) # FDR-corrected post hocs with Hedges'g effect size ttests = df.pairwise_tests(dv='Scores', within='Time', subject='Subject', padjust='fdr_bh', effsize='hedges') assert (ttests.equals( pg.pairwise_tests(dv='Scores', within='Time', subject='Subject', padjust='fdr_bh', effsize='hedges', data=df))) # Pairwise Tukey tukey = df.pairwise_tukey(dv='Scores', between='Group') assert tukey.equals( pg.pairwise_tukey(data=df, dv='Scores', between='Group')) # Test two-way mixed ANOVA aov = df.mixed_anova(dv='Scores', between='Group', within='Time', subject='Subject', correction=False) assert (aov.equals( pg.mixed_anova(dv='Scores', between='Group', within='Time', subject='Subject', correction=False, data=df))) # Test parwise correlations corrs = data.pairwise_corr(columns=['X', 'M', 'Y'], method='spearman') corrs2 = pg.pairwise_corr(data=data, columns=['X', 'M', 'Y'], method='spearman') assert corrs['r'].equals(corrs2['r']) # Test partial correlation corrs = data.partial_corr(x='X', y='Y', covar='M', method='spearman') corrs2 = pg.partial_corr(x='X', y='Y', covar='M', method='spearman', data=data) assert corrs['r'].equals(corrs2['r']) # Test partial correlation matrix (compare with the ppcor package) corrs = data.iloc[:, :5].pcorr().round(3) np.testing.assert_array_equal(corrs.iloc[0, :].to_numpy(), [1, 0.392, 0.06, -0.014, -0.149]) # Now compare against Pingouin's own partial_corr function corrs = data[['X', 'Y', 'M']].pcorr() corrs2 = data.partial_corr(x='X', y='Y', covar='M') assert np.isclose(corrs.at['X', 'Y'], corrs2.at['pearson', 'r']) # Test rcorr (correlation matrix with p-values) # We compare against Pingouin pairwise_corr function corrs = df_corr.rcorr(padjust='holm', decimals=4) corrs2 = df_corr.pairwise_corr(padjust='holm').round(4) assert corrs.at['Neuroticism', 'Agreeableness'] == '*' assert (corrs.at['Agreeableness', 'Neuroticism'] == str(corrs2.at[2, 'r'])) corrs = df_corr.rcorr(padjust='holm', stars=False, decimals=4) assert (corrs.at['Neuroticism', 'Agreeableness'] == str(corrs2.at[2, 'p-corr'].round(4))) corrs = df_corr.rcorr(upper='n', decimals=5) corrs2 = df_corr.pairwise_corr().round(5) assert corrs.at['Extraversion', 'Openness'] == corrs2.at[4, 'n'] assert corrs.at['Openness', 'Extraversion'] == str(corrs2.at[4, 'r']) # Method = spearman does not work with Python 3.5 on Travis? # Instead it seems to return the Pearson correlation! df_corr.rcorr(method='spearman') df_corr.rcorr() # Test mediation analysis med = data.mediation_analysis(x='X', m='M', y='Y', seed=42, n_boot=500) np.testing.assert_array_equal(med.loc[:, 'coef'].round(4).to_numpy(), [0.5610, 0.6542, 0.3961, 0.0396, 0.3565])
tisDatSjTpm1['intronNum_lgstTpt'] > 0, tisName + '_br'] / tisDatSjTpm1.loc[ tisDatSjTpm1['intronNum_lgstTpt'] > 0, 'intronNum_lgstTpt'] tisDatSjTpm1.loc[ tisDatSjTpm1[tisName + '_lsjN'] > 0, tisName + '_brPerObIn'] = tisDatSjTpm1.loc[ tisDatSjTpm1[tisName + '_lsjN'] > 0, tisName + '_br'] / tisDatSjTpm1.loc[tisDatSjTpm1[tisName + '_lsjN'] > 0, tisName + '_lsjN'] result1 = [len(tisDatSjTpm1),sum(tisDatSjTpm1[tisName+'_totSj']),sum(tisDatSjTpm1[tisName+'_totLsj']),sum(tisDatSjTpm1[tisName+'_totBsj']),sum(tisDatSjTpm1[tisName+'_totBsj'])/sum(tisDatSjTpm1[tisName+'_totSj'])]+\ list(spearmanr(tisDatSjTpm1[tisName+'_totSj'],tisDatSjTpm1[tisName+'_br']))+\ list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,tisName+'_totSj'],tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,tisName+'_brPerAnnIn']))+\ list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_totSj'],tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_brPerObIn']))+\ list(partial_corr(data=tisDatSjTpm1,x=tisName+'_totSj',y=tisName+'_br',covar='intronNum_lgstTpt',method='spearman').iloc[0,[1,5]])+\ list(partial_corr(data=tisDatSjTpm1,x=tisName+'_totSj',y=tisName+'_br',covar=tisName+'_lsjN',method='spearman').iloc[0,[1,5]])+\ list(spearmanr(tisDatSjTpm1[tisName+'_allTpm'],tisDatSjTpm1[tisName+'_br']))+\ list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,tisName+'_allTpm'],tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,tisName+'_brPerAnnIn']))+\ list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_allTpm'],tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_brPerObIn']))+\ list(partial_corr(data=tisDatSjTpm1,x=tisName+'_allTpm',y=tisName+'_br',covar='intronNum_lgstTpt',method='spearman').iloc[0,[1,5]])+\ list(partial_corr(data=tisDatSjTpm1,x=tisName+'_allTpm',y=tisName+'_br',covar=tisName+'_lsjN',method='spearman').iloc[0,[1,5]])+\ list(spearmanr(tisDatSjTpm1['intronNum_lgstTpt'],tisDatSjTpm1[tisName+'_br']))+\ list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,'intronNum_lgstTpt'],tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,tisName+'_brPerAnnIn']))+\ list(spearmanr(tisDatSjTpm1[tisName+'_lsjN'],tisDatSjTpm1[tisName+'_br']))+\ list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_lsjN'],tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_brPerObIn'])) tisDatSjTpm1.to_csv(sps[i] + '_' + tisName + '_geneWithSjTpm1.xls', sep="\t", index=False, na_rep='NA')
VIF["VIF"] = [ variance_inflation_factor(independentpd.values, i) for i in range(independentpd.shape[1]) ] print(VIF) independentpd = independentpd.drop(columns=['Year', 'waryn']) independent = independentpd.to_numpy() X_train, X_test, Y_train, y_test = train_test_split(independent, dependent, test_size=0.2, random_state=0) spearman = partial_corr(data=aganalyze, x='warchange', y='Change', x_covar=['warno', 'Area_cat', 'waryn'], y_covar=['Item_cat', 'Area_cat'], method='spearman') spearman = partial_corr(data=aganalyze, x='warno', y='Change', x_covar=['warchange', 'Area_cat', 'waryn'], y_covar=['Item_cat', 'Area_cat'], method='spearman') residtestmodel = LinearRegression().fit(X_train, Y_train) residpredict = residtestmodel.predict(X_test) residtest = pd.DataFrame() plotresidpredict = pd.DataFrame(residpredict) plottest = pd.DataFrame(y_test) residtest['resid'] = (plottest[0] - plotresidpredict[0])
def ExploreIdea2(): [a, b, cP, Sa, Sb, ScP, IE, SIE, data] = CalculateSimulatedEffectSizes(1000, 0.5, 0.5, 0.5, 99) df = pd.DataFrame(data, columns={'C', 'A', 'B'}) print(np.corrcoef(data.T)) pg.partial_corr(data=df, x='A', y='C', covar='B').round(3)
# let's convert these data to a pandas frame df = pd.DataFrame() df['x1'] = x1 df['x2'] = x2 df['x3'] = x3 # compute the "raw" correlation matrix cormatR = df.corr() print(cormatR) # print out one value print(' ') print(cormatR.values[1, 0]) # partial correlation pc = pg.partial_corr(df, x='x3', y='x2', covar='x1') print(' ') print(pc) # In[ ]: ## visualize the matrices fig, ax = plt.subplots(1, 2, figsize=(6, 3)) # raw correlations ax[0].imshow(cormatR.values, vmin=-1, vmax=1) ax[0].set_xticks(range(3)) ax[0].set_yticks(range(3)) # add text
# add color coded for crowding and no-crowding displays insert_new_col(my_data, "crowdingcons", 'colorcode', add_color_code_by_crowdingcons) # color coded insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons", "colorcode5levels", add_color_code_5levels) # %% correlations my_data = get_analysis_dataframe(my_data, crowding = crowdingcons) # data for each winsize df_list_beforegb = [get_sub_df_according2col_value(my_data, "winsize", winsize) for winsize in winsize_list] df_list = [get_data_to_analysis(df, "deviation_score", "count_number", "N_disk", "list_index", "colorcode", "colorcode5levels") for df in df_list_beforegb] # partial corr parameters method = "pearson" x = "count_number" y = "deviation_score" covar = "N_disk" partial_corr_res_list = [pg.partial_corr(df, x = x, y = y, covar = covar, method = method) for df in df_list] # %% normalization df_list_norm_deviation = [normalize_deviation(df) for df in df_list] df_list_norm_countn = [normalize_zerotoone(df, to_normalize_col = "count_number") for df in df_list] # rename normed cols old_name_dev = "deviation_score" new_name_dev = "deviation_score_norm" old_name_countn = "count_number" new_name_countn = "count_number_norm" df_list_norm_deviation = [rename_norm_col(df, old_name_dev, new_name_dev) for df in df_list_norm_deviation] df_list_norm_countn = [rename_norm_col(df, old_name_countn, new_name_countn) for df in df_list_norm_countn] # contact orig dataframe with new normalized dataframe df_list = [pd.concat([df, df_list_norm_deviation[index], df_list_norm_countn[index]], axis = 1) for index, df in enumerate(df_list)] # %% cal residuals (to plot) [calculate_residuals(df) for df in df_list]
data = gen_test(500) data.iloc[5:60, :] = np.nan x = np.ma.masked_where(np.isnan(data['x'].values), data['x'].values).reshape(25, 20) y = np.ma.masked_where(np.isnan(data['y'].values), data['y'].values).reshape(25, 20) c1 = np.ma.masked_where(np.isnan(data['c1'].values), data['c1'].values).reshape(25, 20) c2 = np.ma.masked_where(np.isnan(data['c2'].values), data['c2'].values).reshape(25, 20) # ----- DEBUG restore shape #x0, x, y0, y = partial_corr_tensor(x, y, [c1, c2]) #print(x0.data, x.data) #print(y0.data, y.data) # ----- DEBUG the actual partial correlation which = 6 stats = pg.partial_corr(data.iloc[which::20, :].dropna(axis=1, how='all'), x='x', y='y', covar=['c1', 'c2']) print(stats) # Tensor-calculate the partial correlation r, p = partial_corr_tensor(x, y, [c1, c2]) print(r[which]) print(p[which]) print(r) print(p)
for v in EV: fib = hf[hf.event == b]['log' + f].dropna() evar = hf[hf.event == b][v].reindex(fib.index) # Full Corr pcc = stats.pearsonr(fib, evar) temp.loc[v][b, 'PCC_' + f] = round(pcc[0], 2) temp.loc[v][b, 'p_' + f] = pcc[1] if pcc[1] <= alpha: sig_list.append(v) # Partial confound = [x for x in EV if x != v] partial = pg.partial_corr(data=hf[hf.event == b], x='log' + f, y=v, covar=confound) temp.loc[v][b, 'partial_' + f] = round(float(partial['r']), 2) temp.loc[v][b, 'p_partial_' + f] = float(partial['p-val']) if float(partial['p-val']) <= alpha: sig_list.append('*') # Semi-Partial confound = [x for x in EV if x != v] partial = pg.partial_corr(data=hf[hf.event == b], x='log' + f, y=v, x_covar=confound) temp.loc[v][b, 'semi_' + f] = round(float(partial['r']), 2) temp.loc[v][b, 'p_semi_' + f] = float(partial['p-val'])
get_sub_df_according2col_value(my_data, "winsize", ws) for ws in winsize ] # data to calcualte partial corr my_data_list2analysis = [ get_data_to_analysis(data, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode", "colorcode5levels") for data in my_data_list ] # partial corr between deviation score and alignment scores method = "pearson" partial_corr_list = [ pg.partial_corr(data, x="deviation_score", y=alignment[indx_align_n], covar="N_disk", method=method) for data in my_data_list2analysis ] # see correlations if cal_pearsonr: pearson_r = [ stats.pearsonr(data["deviation_score"], data[alignment[indx_align_n]]) for data in my_data_list2analysis ] pearson_r2 = [ stats.pearsonr(data["N_disk"], data[alignment[indx_align_n]]) for data in my_data_list2analysis ] pearson_r3 = [
mm = 0 for ifmri in range(cons - 1): for jfmri in range(cons - 1 - ifmri): v2_fmriRDM[mm] = fmri_rdms[ifmri, ifmri + jfmri + 1] mm = mm + 1 #sort data to satisfy format data = pd.DataFrame({ 'bhv_RDM': v1_bhvRDM, 'fmri_RDMs': v2_fmriRDM, 'Covariation': v3_bhvsentenceLRDM }) #nan-->0 dataxin = data.fillna(0) corrs = pg.partial_corr(dataxin, x='bhv_RDM', y='fmri_RDMs', covar='Covariation', method='spearman') corrs_result[i, j, k] = corrs[['r', 'p-val']] ##Do the Fisher-Z transform of the r Zcorrs_result[i, j, k] = corrs[['r', 'p-val']] Zcorrs_result[i, j, k, 0] = 0.5 * np.log( (1 + corrs['r']) / (1 - corrs['r'])) """*** sav RSA result ***""" # get the affine info affine = get_affine(mask_filename) # save the RSA result as a .nii file RSAresultfilename = ('%s/partialSpearmanRSAimg_%s.nii' % (RSAresultpath, subID[sub][4:10])) #If img_background=None, the background will be ch2.nii.gz.
"list_index", "colorcode", "colorcode5levels") for df in df_list_beforegb ] # correaltion paramters method = "pearson" x = "a_values" y = "deviation_score" covar = "N_disk" # corr: a values and numerosity corr_av_ndisc = list() corr_av_ndisc = [ stats.pearsonr(sub_df[x], sub_df[covar]) for sub_df in df_list ] if parrtial_corr: partial_corr_res_list = [ pg.partial_corr(df, x=x, y=y, covar=covar, method=method) for df in df_list ] else: corr_res_list = [ stats.pearsonr(sub_df[x], sub_df[y]) for sub_df in df_list ] # %% normalization df_list_norm_deviation = [normalize_deviation(df) for df in df_list] df_list_norm_avs = [ normalize_minusonetozero(df, to_normalize_col="a_values") for df in df_list ] # rename normed cols old_name_dev = "deviation_score" new_name_dev = "deviation_score_norm"
def get_partial_corr_df(indx_align_n = 0, w03 = winsize03, w04 = winsize04, w05 = winsize05, w06 = winsize06, w07 = winsize07): """ get one partial corr dataframe for given angle size, indicated by indx_align_n """ w03 = get_data_to_analysis(w03, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode", "colorcode5levels") w04 = get_data_to_analysis(w04, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode", "colorcode5levels") w05 = get_data_to_analysis(w05, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode", "colorcode5levels") w06 = get_data_to_analysis(w06, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode", "colorcode5levels") w07 = get_data_to_analysis(w07, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode", "colorcode5levels") method = "pearson" try: partial_corr_03 = pg.partial_corr(w03, x = "deviation_score", y = alignment[indx_align_n], covar = "N_disk", method = method) except Exception: partial_corr_03 = pd.DataFrame() try: partial_corr_04 = pg.partial_corr(w04, x = "deviation_score", y = alignment[indx_align_n], covar = "N_disk", method = method) except Exception: partial_corr_04 = pd.DataFrame() try: partial_corr_05 = pg.partial_corr(w05, x = "deviation_score", y = alignment[indx_align_n], covar = "N_disk", method = method) except Exception: partial_corr_05 = pd.DataFrame() partial_corr_06 = pg.partial_corr(w06, x = "deviation_score", y = alignment[indx_align_n], covar = "N_disk", method = method) partial_corr_07 = pg.partial_corr(w07, x = "deviation_score", y = alignment[indx_align_n], covar = "N_disk", method = method) # normalization w03_norm_deviation = normalize_deviation(w03) w04_norm_deviation = normalize_deviation(w04) w05_norm_deviation = normalize_deviation(w05) w06_norm_deviation = normalize_deviation(w06) w07_norm_deviation = normalize_deviation(w07) w03_norm_align_v = normalize_zerotoone(w03, to_normalize_col = alignment[indx_align_n]) w04_norm_align_v = normalize_zerotoone(w04, to_normalize_col = alignment[indx_align_n]) w05_norm_align_v = normalize_zerotoone(w05, to_normalize_col = alignment[indx_align_n]) w06_norm_align_v = normalize_zerotoone(w06, to_normalize_col = alignment[indx_align_n]) w07_norm_align_v = normalize_zerotoone(w07, to_normalize_col = alignment[indx_align_n]) # rename normed cols old_name_dev = "deviation_score" new_name_dev = "deviation_score_norm" old_name_alig = alignment[indx_align_n] new_name_alig = alignment[indx_align_n] + "_norm" w03_norm_deviation = rename_norm_col(w03_norm_deviation, old_name_dev, new_name_dev) w04_norm_deviation = rename_norm_col(w04_norm_deviation, old_name_dev, new_name_dev) w05_norm_deviation = rename_norm_col(w05_norm_deviation, old_name_dev, new_name_dev) w06_norm_deviation = rename_norm_col(w06_norm_deviation, old_name_dev, new_name_dev) w07_norm_deviation = rename_norm_col(w07_norm_deviation, old_name_dev, new_name_dev) w03_norm_align_v = rename_norm_col(w03_norm_align_v, old_name_alig, new_name_alig) w04_norm_align_v = rename_norm_col(w04_norm_align_v, old_name_alig, new_name_alig) w05_norm_align_v = rename_norm_col(w05_norm_align_v, old_name_alig, new_name_alig) w06_norm_align_v = rename_norm_col(w06_norm_align_v, old_name_alig, new_name_alig) w07_norm_align_v = rename_norm_col(w07_norm_align_v, old_name_alig, new_name_alig) # contact orig dataframe with new normalized dataframe w03 = pd.concat([w03, w03_norm_deviation, w03_norm_align_v], axis = 1) w04 = pd.concat([w04, w04_norm_deviation, w04_norm_align_v], axis = 1) w05 = pd.concat([w05, w05_norm_deviation, w05_norm_align_v], axis = 1) w06 = pd.concat([w06, w06_norm_deviation, w06_norm_align_v], axis = 1) w07 = pd.concat([w07, w07_norm_deviation, w07_norm_align_v], axis = 1) # new data to cal partial corr my_data_new = pd.concat([w03, w04, w05, w06, w07], axis = 0, sort = True) partial_corr_all = pg.partial_corr(my_data_new, x = "deviation_score", y = alignment[indx_align_n], covar = "N_disk", method = method) partial_corr = pd.concat( [partial_corr_03, partial_corr_04, partial_corr_05, partial_corr_06, partial_corr_07, partial_corr_all], axis = 0) return partial_corr
def SFC_regress_out_distance(structure_file_path, function_file_path, electrode_localization_by_atlas_file_path, outputfile): import pingouin as pg #Get functional connecitivty data in pickle file format with open(function_file_path, 'rb') as f: broadband, alphatheta, beta, lowgamma, highgamma, electrode_row_and_column_names, order_of_matrices_in_pickle_file = pickle.load( f) FC_list = [broadband, alphatheta, beta, lowgamma, highgamma] # set up the dataframe of electrodes to analyze final_electrodes = pd.DataFrame(electrode_row_and_column_names, columns=['electrode_name']) final_electrodes = final_electrodes.reset_index() final_electrodes = final_electrodes.rename(columns={"index": "func_index"}) #Get Structural Connectivity data in mat file format. Output from DSI studio structural_connectivity_array = np.array( pd.DataFrame(loadmat(structure_file_path)['connectivity'])) #Get electrode localization by atlas csv file data. From get_electrode_localization.py electrode_localization_by_atlas = pd.read_csv( electrode_localization_by_atlas_file_path) # normalizing and log-scaling the structural matrices structural_connectivity_array[structural_connectivity_array == 0] = 1 structural_connectivity_array = np.log10( structural_connectivity_array ) # log-scaling. Converting 0s to 1 to avoid taking log of zeros structural_connectivity_array = structural_connectivity_array / np.max( structural_connectivity_array) # normalization #Only consider electrodes that are in both the localization and the pickle file final_electrodes = final_electrodes.merge(electrode_localization_by_atlas, on='electrode_name') # Remove electrodes in the Functional Connectivity matrices that have a region of 0 final_electrodes = final_electrodes[final_electrodes['region_number'] != 0] for i in range(len(FC_list)): FC_list[i] = FC_list[i][final_electrodes['func_index'], :, :] FC_list[i] = FC_list[i][:, final_electrodes['func_index'], :] #Fisher z-transform of functional connectivity data. This is to take means of correlations and do correlations to the structural connectivity #Fisher z transform is just arctanh for i in range(len(FC_list)): FC_list[i] = np.arctanh(FC_list[i]) # Remove structural ROIs not in electrode_localization ROIs electrode_ROIs = np.unique(np.array(final_electrodes.iloc[:, 5])) electrode_ROIs = electrode_ROIs[~(electrode_ROIs == 0)] #remove region 0 structural_index = electrode_ROIs - 1 #subtract 1 because of python's zero indexing structural_connectivity_array = structural_connectivity_array[ structural_index, :] structural_connectivity_array = structural_connectivity_array[:, structural_index] #taking average functional connectivity for those electrodes in same atlas regions # produce the distance matrix to regress out dist_matrix = np.zeros( (final_electrodes.shape[0], final_electrodes.shape[0])) for i in range(0, final_electrodes.shape[0]): for j in range(0, final_electrodes.shape[0]): if (i != j): c_i = final_electrodes.iloc[i, 2:5] c_j = final_electrodes.iloc[j, 2:5] dist = np.sqrt((c_i[0] - c_j[0])**2 + (c_i[1] - c_j[1])**2 + (c_i[2] - c_j[2])**2) dist_matrix[i, j] = dist dist_matrix[j, i] = dist for i in range(len(FC_list)): ROIs = np.array(final_electrodes.iloc[:, 5]) for r in range(len(electrode_ROIs)): index_logical = (ROIs == electrode_ROIs[r]) index_first = np.where(index_logical)[0][0] index_second_to_end = np.where(index_logical)[0][1:] mean = np.mean(FC_list[i][index_logical, :, :], axis=0) # add in code to average the distance in the regions # only need to modify distance once if (i == 0): mean_dist = np.mean(dist_matrix[index_logical, :], axis=0) # Fill in with mean. FC_list[i][index_first, :, :] = mean FC_list[i][:, index_first, :] = mean # fill in with mean distance if (i == 0): dist_matrix[index_first, :] = mean_dist dist_matrix[:, index_first] = mean_dist #delete the other rows and oclumns belonging to same region. FC_list[i] = np.delete(FC_list[i], index_second_to_end, axis=0) FC_list[i] = np.delete(FC_list[i], index_second_to_end, axis=1) # delete the other rows and columns in the distance matrix if (i == 0): dist_matrix = np.delete(dist_matrix, index_second_to_end, axis=0) dist_matrix = np.delete(dist_matrix, index_second_to_end, axis=1) #keeping track of which electrode labels correspond to which rows and columns ROIs = np.delete(ROIs, index_second_to_end, axis=0) #remove electrodes in the ROI labeld as zero index_logical = (ROIs == 0) index = np.where(index_logical)[0] FC_list[i] = np.delete(FC_list[i], index, axis=0) FC_list[i] = np.delete(FC_list[i], index, axis=1) ROIs = np.delete(ROIs, index, axis=0) # remove electrodes in the ROI labeled as zero from distance dist_matrix = np.delete(dist_matrix, index, axis=0) dist_matrix = np.delete(dist_matrix, index, axis=1) #order FC matrices by ROIs order = np.argsort(ROIs) for i in range(len(FC_list)): FC_list[i] = FC_list[i][order, :, :] FC_list[i] = FC_list[i][:, order, :] # order the distance matrix by ROIs dist_matrix = dist_matrix[order, :] dist_matrix = dist_matrix[:, order] #un-fisher ztranform for i in range(len(FC_list)): FC_list[i] = np.tanh(FC_list[i]) #initialize correlation arrays Corrrelation_list = [None] * len(FC_list) for i in range(len(FC_list)): Corrrelation_list[i] = np.zeros([FC_list[0].shape[2]], dtype=float) correlation_type = 'spearman' #calculate Structure-Function Correlation. for i in range(len(FC_list)): for t in range(FC_list[i].shape[2] - 1): #Spearman Rank Correlation: functional connectivity and structural connectivity are non-normally distributed. So we should use spearman to_corr_df = pd.DataFrame({ 'func': np.ndarray.flatten(FC_list[i][:, :, t]), 'dist': np.ndarray.flatten(dist_matrix), 'struc': np.ndarray.flatten(structural_connectivity_array) }) if correlation_type == 'spearman': Corrrelation_list[i][t] = pg.partial_corr( to_corr_df, x='func', y='struc', covar='dist', method='spearman' ).iloc[ 0, 1] #spearmanr( np.ndarray.flatten(FC_list[i][:,:,t]) , np.ndarray.flatten(structural_connectivity_array) ).correlation #print("spearman") # Pearson Correlation: This is calculated bc past studies use Pearson Correlation and we want to see if these results are comparable. if correlation_type == 'pearson': Corrrelation_list[i][t] = pg.partial_corr( to_corr_df, x='func', y='struc', covar='dist', method='pearson' ).iloc[ 0, 1] #pearsonr(np.ndarray.flatten(FC_list[i][:, :, t]), np.ndarray.flatten(structural_connectivity_array))[0] order_of_matrices_in_pickle_file = pd.DataFrame( ["broadband", "alphatheta", "beta", "lowgamma", "highgamma"], columns=["Order of matrices in pickle file"]) with open(outputfile, 'wb') as f: pickle.dump([ Corrrelation_list[0], Corrrelation_list[1], Corrrelation_list[2], Corrrelation_list[3], Corrrelation_list[4], order_of_matrices_in_pickle_file ], f)