Пример #1
0
    def edgewise_pcorr(self, train_subs, fold, perm, method='pearson'):
        corr_dfs = [
        ]  # Appending to list and then creating dataframe is substantially faster than appending to dataframe
        empty_df = pd.DataFrame({
            'r': {
                'pearson': np.nan
            },
            'p-val': {
                'pearson': np.nan
            }
        })  # Handle all-zero edges
        train_data = self.data['data'].loc[train_subs]
        train_data.columns = train_data.columns.astype(str)

        N = len(self.data['edges'])
        n = 1
        percent = round((n / N) * 100)

        for edge in self.data['edges']:  # Edge-wise correlation
            if (train_data[edge] != 0).any(
            ):  # All-zero columns will raise a ValueError exception. This is _way_ faster than try: except:
                if perm >= 0:
                    y = "{}-perm-{}".format(self.data['behav'], perm)
                else:
                    y = self.data['behav']
                if self.data['covars']:
                    pcorr = partial_corr(
                        data=train_data,
                        x=edge,
                        y=y,
                        covar=self.data['covars'],
                        method=method
                    )[[
                        'r', 'p-val'
                    ]]  # Taking only the necessary columns speeds this up a few %
                    pcorr['covars'] = True  # Debug, remove later
                else:  # We could also use pcorr from Pingouin on the entire df, but this is a prohibitively memory-intensive operation; edge-wise like this works just fine. This was introduced to test implausibly good results for the above operation by Pengouin's partial_corr, by setting covars=None we can use SciPy's implementation of Pearson's r
                    pcorr = empty_df.copy()
                    pcorr[['r', 'p-val']] = sp.stats.pearsonr(
                        train_data.loc[:, edge], train_data.loc[:, y]
                    )  # We are basically reproducing Pingouin's output format here for unified downstream processing
                    pcorr['covars'] = False  # Debug, remove later
            else:
                pcorr = empty_df
            corr_dfs.append(pcorr)
            percent_new = round((n / N) * 100)
            if perm >= 0:
                fold_msg = "{} of permutation {}".format(fold + 1, perm + 1)
            else:
                fold_msg = fold + 1
            if percent_new > percent:
                self.status_update("Computing fold {} ({} %)...".format(
                    fold_msg, percent_new))
                percent = percent_new
            n += 1
        self.status_update("Assembling data frame...")
        combined_corr_dfs = pd.concat(
            corr_dfs
        )  # Assembling df before .put() seems to avoid awfully slow pickling of data through queue (or whatever, it is orders of magnitude faster that way)
        return combined_corr_dfs
Пример #2
0
    def calculate_partial_correlation(self):
        partial_correlations_list = []
        data = pd.DataFrame(self.shap_values, columns=self.sensor_names)
        data["RUL"] = testing_RULs

        for sensor_name1 in self.sensor_names:
            sensor_names3 = []
            for sensor_name2 in self.sensor_names:
                if sensor_name2 != sensor_name1:
                    sensor_names3.append(sensor_name2)
            res = math.fabs(partial_corr(data=data, x=sensor_name1, y='RUL', y_covar=sensor_names3, method='pearson')['r'][0])

            partial_correlations_list.append(res)
        print("Partial correl", partial_correlations_list)
        self.correlate_indicators(partial_correlations_list)
Пример #3
0
def calculateBrainBehaviorCorrelations(subjects, TRs, brainRegions):
    global ling1D, analog1D, presentedImages

    # Create multi-index
    subjectsInd = np.repeat(subjects, len(TRs))
    TRsInd = np.tile(TRs, len(subjects))
    behavioralInd = np.tile(['analog', 'ling', 'partial'],
                            len(TRs) * len(subjects))
    arrays = [subjectsInd, TRsInd, behavioralInd]
    tuples = list(zip(*arrays))
    index = pd.MultiIndex.from_tuples(
        tuples, names=["subjects", "TRs", "behavioralMeasure"])

    brainCorr = pd.DataFrame(columns=brainRegions, index=index)

    for subject in subjects:
        print(subject)
        for TR in TRs:
            print(TR)
            subjMat, stimList = loadSubjectBrainData(subject, TR)
            for region in brainRegions:
                regionMat = pd.DataFrame(subjMat[region].T,
                                         columns=stimList.iloc[:, 0])
                regionMat = regionMat[regionMat.columns.intersection(
                    presentedImages.iloc[:, 0])]
                regionCorr = regionMat.corr()
                regionCorr.sort_index(inplace=True, axis=0)
                regionCorr.sort_index(inplace=True, axis=1)
                regionCorrNP = regionCorr.to_numpy()
                regionCorr1D = np.empty(regionCorrNP.size)
                regionCorr1D = np.reshape(regionCorrNP, regionCorrNP.size)

                dfPartial['brain'] = regionCorr1D

                brainCorr.at[(subject, TR, 'analog'),
                             region] = np.corrcoef(analog1D, regionCorr1D)[0,
                                                                           1]

                brainCorr.at[(subject, TR, 'ling'),
                             region] = np.corrcoef(ling1D, regionCorr1D)[0, 1]

                brainCorr.at[(subject, TR, 'partial'),
                             region] = pg.partial_corr(
                                 data=dfPartial,
                                 x='analog',
                                 y='brain',
                                 covar='ling').round(3).r.pearson
    return brainCorr
Пример #4
0
    def correlate(self,
                  x_list=None,
                  y_list=None,
                  c_list=None,
                  method="spearman",
                  figfmt="png",
                  skip=False):
        """Correlate variables.
        """
        if skip:
            return self

        x_list = self.phtp_list if x_list is None else x_list
        y_list = self.phtp_list if y_list is None else y_list
        c_list = self.cvrt_list if c_list is None else c_list

        x_len, y_len = len(x_list), len(y_list)
        pcorr_mtrx_np = np.eye(x_len, y_len)
        for idx_x, pntp_x in enumerate(x_list):
            for idx_y, pntp_y in enumerate(y_list):
                if pntp_y != pntp_x:
                    try:
                        _pcorr_dtfm = pg.partial_corr(self.dataframe,
                                                      pntp_x,
                                                      pntp_y,
                                                      self.cvrt_list,
                                                      method=method)
                        pcorr_mtrx_np[idx_x, idx_y] = _pcorr_dtfm['r']
                    except AssertionError as err:
                        pass

        self.pcor_dtfm = pd.DataFrame(pcorr_mtrx_np,
                                      index=self.phtp_list,
                                      columns=self.phtp_list)
        pcorr_htmp_name = ".".join(
            [self.output_prefix, "correlation_heatmap", figfmt])
        ctmp_grid = sb.clustermap(self.pcor_dtfm,
                                  col_cluster=True,
                                  row_cluster=True,
                                  cmap="Greens")
        ctmp_grid.fig.savefig(pcorr_htmp_name)

        plt.cla()
        plt.clf()
        plt.close()

        return self
def partial_corr_brainxdcnn(conv_out=0):

    layers_interest = list(range(1, nb_layers))
    layers_interest = np.delete(layers_interest, [conv_out])

    partialKendall_timec_df = pd.DataFrame()
    partial_pval_df = pd.DataFrame()
    partial_corr_timecourse = dict()
    # correlate brain and dcnn rdms
    for lay in layers_interest:

        layer_name = model.layers[lay].name  # e.g. block1_conv1

        print(f"{layer_name}")
        # save layer's RDM to correlate with human brain RDMs
        corr_rdms_df = pd.read_pickle(
            op.join(output_layer_rdms_dir, f'rdm_{layer_name}.pkl'))
        corr_rdms_array = normalise_dist(
            scipy.spatial.distance.squareform(1 - (np.asarray(corr_rdms_df))))

        partialK_timecourse = []
        partialK_pvals_timecourse = []
        partial_corr_timecourse[layer_name] = {}
        for this_slice in range(len(times)):
            coeffs_temp = pg.partial_corr(data=big_df,
                                          x=f"{layer_name}",
                                          y=f"{times[this_slice].round(3)}",
                                          covar=[layers_names[conv_out]],
                                          method='kendall')
            partialK_timecourse.append(coeffs_temp.r.kendall)
            partialK_pvals_timecourse.append(coeffs_temp['p-val'].kendall)

        temp_df = pd.DataFrame({f'{layer_name}': partialK_timecourse})
        temp_val_df = pd.DataFrame(
            {f'{layer_name}': partialK_pvals_timecourse})
        partialKendall_timec_df = pd.concat([partialKendall_timec_df, temp_df],
                                            axis=1)
        partial_pval_df = pd.concat([partial_pval_df, temp_val_df], axis=1)
    return partialKendall_timec_df, partial_pval_df
def partial_corr(C, method='pearson'):
    p = C.shape[1]
    index = C.columns.tolist()
    P_corr = np.zeros((p, p), dtype=np.float)
    P_pval = np.zeros((p, p), dtype=np.float)
    for i in range(p):
        P_corr[i, i] = 1
        P_pval[i, i] = 1
        for j in range(i + 1, p):
            x, y = index[i], index[j]
            res = pg.partial_corr(data=C,
                                  x=x,
                                  y=y,
                                  covar=list(set(index) - set([x, y])),
                                  method=method)
            corr, pval = res['r'][0], res['p-val'][0]
            P_corr[i, j] = corr
            P_corr[j, i] = corr
            P_pval[i, j] = pval
            P_pval[j, i] = pval
    P_corr = pd.DataFrame(P_corr, index=index, columns=index)
    P_pval = pd.DataFrame(P_pval, index=index, columns=index)
    return (P_corr, P_pval)
Пример #7
0
    def test_pandas(self):
        """Test pandas method.
        """
        # Test the ANOVA (Pandas)
        aov = df.anova(dv='Scores', between='Group', detailed=True)
        assert aov.equals(
            pg.anova(dv='Scores', between='Group', detailed=True, data=df))

        # Test the Welch ANOVA (Pandas)
        aov = df.welch_anova(dv='Scores', between='Group')
        assert aov.equals(pg.welch_anova(dv='Scores', between='Group',
                                         data=df))

        # Test the repeated measures ANOVA (Pandas)
        aov = df.rm_anova(dv='Scores',
                          within='Time',
                          subject='Subject',
                          detailed=True)
        assert aov.equals(
            pg.rm_anova(dv='Scores',
                        within='Time',
                        subject='Subject',
                        detailed=True,
                        data=df))

        # FDR-corrected post hocs with Hedges'g effect size
        ttests = df.pairwise_ttests(dv='Scores',
                                    within='Time',
                                    subject='Subject',
                                    padjust='fdr_bh',
                                    effsize='hedges')
        assert ttests.equals(
            pg.pairwise_ttests(dv='Scores',
                               within='Time',
                               subject='Subject',
                               padjust='fdr_bh',
                               effsize='hedges',
                               data=df))

        # Test two-way mixed ANOVA
        aov = df.mixed_anova(dv='Scores',
                             between='Group',
                             within='Time',
                             subject='Subject',
                             correction=False)
        assert aov.equals(
            pg.mixed_anova(dv='Scores',
                           between='Group',
                           within='Time',
                           subject='Subject',
                           correction=False,
                           data=df))

        # Test parwise correlations
        corrs = data.pairwise_corr(columns=['X', 'M', 'Y'], method='spearman')
        corrs2 = pg.pairwise_corr(data=data,
                                  columns=['X', 'M', 'Y'],
                                  method='spearman')
        assert corrs['r'].equals(corrs2['r'])

        # Test partial correlation
        corrs = data.partial_corr(x='X', y='Y', covar='M', method='spearman')
        corrs2 = pg.partial_corr(x='X',
                                 y='Y',
                                 covar='M',
                                 method='spearman',
                                 data=data)
        assert corrs['r'].equals(corrs2['r'])

        # Test partial correlation matrix (compare with the ppcor package)
        corrs = data.pcorr().round(3)
        np.testing.assert_array_equal(corrs.iloc[0, :].values,
                                      [1, 0.392, 0.06, -0.014, -0.149])
        # Now compare against Pingouin's own partial_corr function
        corrs = data[['X', 'Y', 'M']].pcorr()
        corrs2 = data.partial_corr(x='X', y='Y', covar='M')
        assert round(corrs.loc['X', 'Y'], 3) == corrs2.loc['pearson', 'r']

        # Test mediation analysis
        med = data.mediation_analysis(x='X', m='M', y='Y', seed=42, n_boot=500)
        np.testing.assert_array_equal(med.loc[:, 'coef'].values,
                                      [0.5610, 0.6542, 0.3961, 0.0396, 0.3565])
Пример #8
0
ax.set_title(label='DepMap Screening in CCLE cell lines',
             fontdict={
                 'fontweight': 'bold',
                 'fontsize': 14
             })
# plt.show()
plt.tight_layout()
fig.savefig('_distribution.pdf')
plt.close()

# calculate partial correlation of all genes besides given gene, adjusted by tissue types
corr_res = {}
for g in depmap_screening.columns.tolist():
    if g == gene_name:
        continue
    df = pd.concat(
        [depmap_screening[[gene_name, g]].astype('float'),
         tmp.astype('int')],
        axis=1).dropna()
    try:
        corr_res[g] = pg.partial_corr(
            df.astype('float'), x=g, y=gene_name,
            covar=tmp.columns.tolist()).loc['pearson', ['r', 'p-val']]
    except:
        continue

corr_res = pd.DataFrame.from_dict(corr_res, orient='index')
corr_res.columns = ['pcorr', 'ppval']
corr_res = corr_res.sort_values('pcorr', ascending=False)
corr_res.to_csv('DepMap_screen_partial_corr_adjByTissueType.csv')

# Partial


data = pd.read_csv('/home/atrides/Desktop/R/statistics_with_Python/06_Correlation/Data_Files/Exam Anxiety.dat', sep='\t')

data = data[['Revise', 'Exam', 'Anxiety']]
print(data.head())

import pingouin as pg

print(data.pcorr())


# Using pingouin
print(pg.partial_corr(data = data, x='Exam', y='Anxiety', covar='Revise'))


# Semi-Partial Correlation

print(pg.partial_corr(data=data, x='Exam' , y='Anxiety', x_covar='Revise'))


#·partial correlation -  quantifies the relationship between two variables while controlling for the effects of a third variable on both variables in the original correlation.
# 
#·semi-partial correlation-  quantifies the relationship between two variables while controlling for the effects of a third variable on only one of the variables in the original correlation



Пример #10
0
import pandas as pd
import pingouin as pg

if __name__ == '__main__':
    PATH = "../../data/ms1_encircle/"
    DATA = "ms1_encircle_corr.xlsx"
    data = pd.read_excel(PATH + DATA)

    # par correlation
    x = "deviation_score_mean"
    y = "groups_mean"
    covar = "numerosity"
    method = "pearson"

    # TODO
    # alignmentcon = "radial"
    alignmentcon = "tangential"
    winsize = 0.7

    data_to_analysis = data[(data["winsize"] == winsize) & (data["crowdingcons"] == alignmentcon)]
    # data_to_analysis = data[(data["crowdingcons"] == alignmentcon)]

    partial_corr = pg.partial_corr(data_to_analysis,
                                   x = x,
                                   y = y,
                                   covar = covar,
                                   method = method)

Пример #11
0
    def test_pandas(self):
        """Test pandas method.
        """
        # Test the ANOVA (Pandas)
        aov = df.anova(dv='Scores', between='Group', detailed=True)
        assert aov.equals(
            pg.anova(dv='Scores', between='Group', detailed=True, data=df))
        aov3_ss1 = df_aov3.anova(dv='Cholesterol',
                                 between=['Sex', 'Drug'],
                                 ss_type=1)
        aov3_ss2 = df_aov3.anova(dv='Cholesterol',
                                 between=['Sex', 'Drug'],
                                 ss_type=2)
        aov3_ss2_pg = pg.anova(dv='Cholesterol',
                               between=['Sex', 'Drug'],
                               data=df_aov3,
                               ss_type=2)
        assert not aov3_ss1.equals(aov3_ss2)
        assert aov3_ss2.round(3).equals(aov3_ss2_pg.round(3))

        # Test the Welch ANOVA (Pandas)
        aov = df.welch_anova(dv='Scores', between='Group')
        assert aov.equals(pg.welch_anova(dv='Scores', between='Group',
                                         data=df))

        # Test the ANCOVA
        aov = df_anc.ancova(dv='Scores', covar='Income',
                            between='Method').round(3)
        assert (aov.equals(
            pg.ancova(data=df_anc,
                      dv='Scores',
                      covar='Income',
                      between='Method').round(3)))

        # Test the repeated measures ANOVA (Pandas)
        aov = df.rm_anova(dv='Scores',
                          within='Time',
                          subject='Subject',
                          detailed=True)
        assert (aov.equals(
            pg.rm_anova(dv='Scores',
                        within='Time',
                        subject='Subject',
                        detailed=True,
                        data=df)))

        # FDR-corrected post hocs with Hedges'g effect size
        ttests = df.pairwise_tests(dv='Scores',
                                   within='Time',
                                   subject='Subject',
                                   padjust='fdr_bh',
                                   effsize='hedges')
        assert (ttests.equals(
            pg.pairwise_tests(dv='Scores',
                              within='Time',
                              subject='Subject',
                              padjust='fdr_bh',
                              effsize='hedges',
                              data=df)))

        # Pairwise Tukey
        tukey = df.pairwise_tukey(dv='Scores', between='Group')
        assert tukey.equals(
            pg.pairwise_tukey(data=df, dv='Scores', between='Group'))

        # Test two-way mixed ANOVA
        aov = df.mixed_anova(dv='Scores',
                             between='Group',
                             within='Time',
                             subject='Subject',
                             correction=False)
        assert (aov.equals(
            pg.mixed_anova(dv='Scores',
                           between='Group',
                           within='Time',
                           subject='Subject',
                           correction=False,
                           data=df)))

        # Test parwise correlations
        corrs = data.pairwise_corr(columns=['X', 'M', 'Y'], method='spearman')
        corrs2 = pg.pairwise_corr(data=data,
                                  columns=['X', 'M', 'Y'],
                                  method='spearman')
        assert corrs['r'].equals(corrs2['r'])

        # Test partial correlation
        corrs = data.partial_corr(x='X', y='Y', covar='M', method='spearman')
        corrs2 = pg.partial_corr(x='X',
                                 y='Y',
                                 covar='M',
                                 method='spearman',
                                 data=data)
        assert corrs['r'].equals(corrs2['r'])

        # Test partial correlation matrix (compare with the ppcor package)
        corrs = data.iloc[:, :5].pcorr().round(3)
        np.testing.assert_array_equal(corrs.iloc[0, :].to_numpy(),
                                      [1, 0.392, 0.06, -0.014, -0.149])
        # Now compare against Pingouin's own partial_corr function
        corrs = data[['X', 'Y', 'M']].pcorr()
        corrs2 = data.partial_corr(x='X', y='Y', covar='M')
        assert np.isclose(corrs.at['X', 'Y'], corrs2.at['pearson', 'r'])

        # Test rcorr (correlation matrix with p-values)
        # We compare against Pingouin pairwise_corr function
        corrs = df_corr.rcorr(padjust='holm', decimals=4)
        corrs2 = df_corr.pairwise_corr(padjust='holm').round(4)
        assert corrs.at['Neuroticism', 'Agreeableness'] == '*'
        assert (corrs.at['Agreeableness',
                         'Neuroticism'] == str(corrs2.at[2, 'r']))
        corrs = df_corr.rcorr(padjust='holm', stars=False, decimals=4)
        assert (corrs.at['Neuroticism',
                         'Agreeableness'] == str(corrs2.at[2,
                                                           'p-corr'].round(4)))
        corrs = df_corr.rcorr(upper='n', decimals=5)
        corrs2 = df_corr.pairwise_corr().round(5)
        assert corrs.at['Extraversion', 'Openness'] == corrs2.at[4, 'n']
        assert corrs.at['Openness', 'Extraversion'] == str(corrs2.at[4, 'r'])
        # Method = spearman does not work with Python 3.5 on Travis?
        # Instead it seems to return the Pearson correlation!
        df_corr.rcorr(method='spearman')
        df_corr.rcorr()

        # Test mediation analysis
        med = data.mediation_analysis(x='X', m='M', y='Y', seed=42, n_boot=500)
        np.testing.assert_array_equal(med.loc[:, 'coef'].round(4).to_numpy(),
                                      [0.5610, 0.6542, 0.3961, 0.0396, 0.3565])
Пример #12
0
                             tisDatSjTpm1['intronNum_lgstTpt'] > 0,
                             tisName + '_br'] / tisDatSjTpm1.loc[
                                 tisDatSjTpm1['intronNum_lgstTpt'] > 0,
                                 'intronNum_lgstTpt']
        tisDatSjTpm1.loc[
            tisDatSjTpm1[tisName + '_lsjN'] > 0,
            tisName + '_brPerObIn'] = tisDatSjTpm1.loc[
                tisDatSjTpm1[tisName + '_lsjN'] > 0, tisName +
                '_br'] / tisDatSjTpm1.loc[tisDatSjTpm1[tisName + '_lsjN'] > 0,
                                          tisName + '_lsjN']

        result1 = [len(tisDatSjTpm1),sum(tisDatSjTpm1[tisName+'_totSj']),sum(tisDatSjTpm1[tisName+'_totLsj']),sum(tisDatSjTpm1[tisName+'_totBsj']),sum(tisDatSjTpm1[tisName+'_totBsj'])/sum(tisDatSjTpm1[tisName+'_totSj'])]+\
                         list(spearmanr(tisDatSjTpm1[tisName+'_totSj'],tisDatSjTpm1[tisName+'_br']))+\
                         list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,tisName+'_totSj'],tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,tisName+'_brPerAnnIn']))+\
                         list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_totSj'],tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_brPerObIn']))+\
                         list(partial_corr(data=tisDatSjTpm1,x=tisName+'_totSj',y=tisName+'_br',covar='intronNum_lgstTpt',method='spearman').iloc[0,[1,5]])+\
                         list(partial_corr(data=tisDatSjTpm1,x=tisName+'_totSj',y=tisName+'_br',covar=tisName+'_lsjN',method='spearman').iloc[0,[1,5]])+\
                         list(spearmanr(tisDatSjTpm1[tisName+'_allTpm'],tisDatSjTpm1[tisName+'_br']))+\
                         list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,tisName+'_allTpm'],tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,tisName+'_brPerAnnIn']))+\
                         list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_allTpm'],tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_brPerObIn']))+\
                         list(partial_corr(data=tisDatSjTpm1,x=tisName+'_allTpm',y=tisName+'_br',covar='intronNum_lgstTpt',method='spearman').iloc[0,[1,5]])+\
                         list(partial_corr(data=tisDatSjTpm1,x=tisName+'_allTpm',y=tisName+'_br',covar=tisName+'_lsjN',method='spearman').iloc[0,[1,5]])+\
                         list(spearmanr(tisDatSjTpm1['intronNum_lgstTpt'],tisDatSjTpm1[tisName+'_br']))+\
                         list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,'intronNum_lgstTpt'],tisDatSjTpm1.loc[tisDatSjTpm1['intronNum_lgstTpt'] > 0,tisName+'_brPerAnnIn']))+\
                         list(spearmanr(tisDatSjTpm1[tisName+'_lsjN'],tisDatSjTpm1[tisName+'_br']))+\
                         list(spearmanr(tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_lsjN'],tisDatSjTpm1.loc[tisDatSjTpm1[tisName+'_lsjN']>0,tisName+'_brPerObIn']))

        tisDatSjTpm1.to_csv(sps[i] + '_' + tisName + '_geneWithSjTpm1.xls',
                            sep="\t",
                            index=False,
                            na_rep='NA')
VIF["VIF"] = [
    variance_inflation_factor(independentpd.values, i)
    for i in range(independentpd.shape[1])
]
print(VIF)

independentpd = independentpd.drop(columns=['Year', 'waryn'])
independent = independentpd.to_numpy()
X_train, X_test, Y_train, y_test = train_test_split(independent,
                                                    dependent,
                                                    test_size=0.2,
                                                    random_state=0)

spearman = partial_corr(data=aganalyze,
                        x='warchange',
                        y='Change',
                        x_covar=['warno', 'Area_cat', 'waryn'],
                        y_covar=['Item_cat', 'Area_cat'],
                        method='spearman')
spearman = partial_corr(data=aganalyze,
                        x='warno',
                        y='Change',
                        x_covar=['warchange', 'Area_cat', 'waryn'],
                        y_covar=['Item_cat', 'Area_cat'],
                        method='spearman')

residtestmodel = LinearRegression().fit(X_train, Y_train)
residpredict = residtestmodel.predict(X_test)
residtest = pd.DataFrame()
plotresidpredict = pd.DataFrame(residpredict)
plottest = pd.DataFrame(y_test)
residtest['resid'] = (plottest[0] - plotresidpredict[0])
Пример #14
0
def ExploreIdea2():
    [a, b, cP, Sa, Sb, ScP, IE, SIE,
     data] = CalculateSimulatedEffectSizes(1000, 0.5, 0.5, 0.5, 99)
    df = pd.DataFrame(data, columns={'C', 'A', 'B'})
    print(np.corrcoef(data.T))
    pg.partial_corr(data=df, x='A', y='C', covar='B').round(3)
Пример #15
0
# let's convert these data to a pandas frame
df = pd.DataFrame()
df['x1'] = x1
df['x2'] = x2
df['x3'] = x3

# compute the "raw" correlation matrix
cormatR = df.corr()
print(cormatR)

# print out one value
print(' ')
print(cormatR.values[1, 0])

# partial correlation
pc = pg.partial_corr(df, x='x3', y='x2', covar='x1')
print(' ')
print(pc)

# In[ ]:

## visualize the matrices

fig, ax = plt.subplots(1, 2, figsize=(6, 3))

# raw correlations
ax[0].imshow(cormatR.values, vmin=-1, vmax=1)
ax[0].set_xticks(range(3))
ax[0].set_yticks(range(3))

# add text
Пример #16
0
 # add color coded for crowding and no-crowding displays
 insert_new_col(my_data, "crowdingcons", 'colorcode', add_color_code_by_crowdingcons)
 # color coded
 insert_new_col_from_two_cols(my_data, "N_disk", "crowdingcons", "colorcode5levels", add_color_code_5levels)
 # %% correlations
 my_data = get_analysis_dataframe(my_data, crowding = crowdingcons)
 # data for each winsize
 df_list_beforegb = [get_sub_df_according2col_value(my_data, "winsize", winsize) for winsize in winsize_list]
 df_list = [get_data_to_analysis(df, "deviation_score", "count_number", "N_disk", "list_index", "colorcode",
                                 "colorcode5levels") for df in df_list_beforegb]
 # partial corr parameters
 method = "pearson"
 x = "count_number"
 y = "deviation_score"
 covar = "N_disk"
 partial_corr_res_list = [pg.partial_corr(df, x = x, y = y, covar = covar, method = method) for df in df_list]
 # %% normalization
 df_list_norm_deviation = [normalize_deviation(df) for df in df_list]
 df_list_norm_countn = [normalize_zerotoone(df, to_normalize_col = "count_number") for df in df_list]
 # rename normed cols
 old_name_dev = "deviation_score"
 new_name_dev = "deviation_score_norm"
 old_name_countn = "count_number"
 new_name_countn = "count_number_norm"
 df_list_norm_deviation = [rename_norm_col(df, old_name_dev, new_name_dev) for df in df_list_norm_deviation]
 df_list_norm_countn = [rename_norm_col(df, old_name_countn, new_name_countn) for df in df_list_norm_countn]
 # contact orig dataframe with new normalized dataframe
 df_list = [pd.concat([df, df_list_norm_deviation[index], df_list_norm_countn[index]], axis = 1) for index, df in
            enumerate(df_list)]
 # %% cal residuals (to plot)
 [calculate_residuals(df) for df in df_list]
Пример #17
0
    data = gen_test(500)
    data.iloc[5:60, :] = np.nan
    x = np.ma.masked_where(np.isnan(data['x'].values),
                           data['x'].values).reshape(25, 20)
    y = np.ma.masked_where(np.isnan(data['y'].values),
                           data['y'].values).reshape(25, 20)
    c1 = np.ma.masked_where(np.isnan(data['c1'].values),
                            data['c1'].values).reshape(25, 20)
    c2 = np.ma.masked_where(np.isnan(data['c2'].values),
                            data['c2'].values).reshape(25, 20)

    # ----- DEBUG restore shape
    #x0, x, y0, y = partial_corr_tensor(x, y, [c1, c2])
    #print(x0.data, x.data)
    #print(y0.data, y.data)

    # ----- DEBUG the actual partial correlation
    which = 6
    stats = pg.partial_corr(data.iloc[which::20, :].dropna(axis=1, how='all'),
                            x='x',
                            y='y',
                            covar=['c1', 'c2'])
    print(stats)

    # Tensor-calculate the partial correlation
    r, p = partial_corr_tensor(x, y, [c1, c2])
    print(r[which])
    print(p[which])
    print(r)
    print(p)
        for v in EV:
            fib = hf[hf.event == b]['log' + f].dropna()
            evar = hf[hf.event == b][v].reindex(fib.index)

            # Full Corr
            pcc = stats.pearsonr(fib, evar)
            temp.loc[v][b, 'PCC_' + f] = round(pcc[0], 2)
            temp.loc[v][b, 'p_' + f] = pcc[1]

            if pcc[1] <= alpha:
                sig_list.append(v)

            # Partial
            confound = [x for x in EV if x != v]
            partial = pg.partial_corr(data=hf[hf.event == b],
                                      x='log' + f,
                                      y=v,
                                      covar=confound)
            temp.loc[v][b, 'partial_' + f] = round(float(partial['r']), 2)
            temp.loc[v][b, 'p_partial_' + f] = float(partial['p-val'])

            if float(partial['p-val']) <= alpha:
                sig_list.append('*')

            # Semi-Partial
            confound = [x for x in EV if x != v]
            partial = pg.partial_corr(data=hf[hf.event == b],
                                      x='log' + f,
                                      y=v,
                                      x_covar=confound)
            temp.loc[v][b, 'semi_' + f] = round(float(partial['r']), 2)
            temp.loc[v][b, 'p_semi_' + f] = float(partial['p-val'])
Пример #19
0
    get_sub_df_according2col_value(my_data, "winsize", ws) for ws in winsize
]

# data to calcualte partial corr
my_data_list2analysis = [
    get_data_to_analysis(data, "deviation_score", alignment[indx_align_n],
                         "N_disk", "list_index", "colorcode",
                         "colorcode5levels") for data in my_data_list
]

# partial corr between deviation score and alignment scores
method = "pearson"
partial_corr_list = [
    pg.partial_corr(data,
                    x="deviation_score",
                    y=alignment[indx_align_n],
                    covar="N_disk",
                    method=method) for data in my_data_list2analysis
]

# see correlations
if cal_pearsonr:
    pearson_r = [
        stats.pearsonr(data["deviation_score"], data[alignment[indx_align_n]])
        for data in my_data_list2analysis
    ]
    pearson_r2 = [
        stats.pearsonr(data["N_disk"], data[alignment[indx_align_n]])
        for data in my_data_list2analysis
    ]
    pearson_r3 = [
Пример #20
0
                mm = 0
                for ifmri in range(cons - 1):
                    for jfmri in range(cons - 1 - ifmri):
                        v2_fmriRDM[mm] = fmri_rdms[ifmri, ifmri + jfmri + 1]
                        mm = mm + 1
                #sort data to satisfy format
                data = pd.DataFrame({
                    'bhv_RDM': v1_bhvRDM,
                    'fmri_RDMs': v2_fmriRDM,
                    'Covariation': v3_bhvsentenceLRDM
                })
                #nan-->0
                dataxin = data.fillna(0)
                corrs = pg.partial_corr(dataxin,
                                        x='bhv_RDM',
                                        y='fmri_RDMs',
                                        covar='Covariation',
                                        method='spearman')
                corrs_result[i, j, k] = corrs[['r', 'p-val']]

                ##Do the Fisher-Z transform of the r
                Zcorrs_result[i, j, k] = corrs[['r', 'p-val']]
                Zcorrs_result[i, j, k, 0] = 0.5 * np.log(
                    (1 + corrs['r']) / (1 - corrs['r']))
    """*** sav RSA result ***"""
    # get the affine info
    affine = get_affine(mask_filename)
    # save the RSA result as a .nii file
    RSAresultfilename = ('%s/partialSpearmanRSAimg_%s.nii' %
                         (RSAresultpath, subID[sub][4:10]))
    #If img_background=None, the background will be ch2.nii.gz.
                          "list_index", "colorcode", "colorcode5levels")
     for df in df_list_beforegb
 ]
 # correaltion paramters
 method = "pearson"
 x = "a_values"
 y = "deviation_score"
 covar = "N_disk"
 # corr: a values and numerosity
 corr_av_ndisc = list()
 corr_av_ndisc = [
     stats.pearsonr(sub_df[x], sub_df[covar]) for sub_df in df_list
 ]
 if parrtial_corr:
     partial_corr_res_list = [
         pg.partial_corr(df, x=x, y=y, covar=covar, method=method)
         for df in df_list
     ]
 else:
     corr_res_list = [
         stats.pearsonr(sub_df[x], sub_df[y]) for sub_df in df_list
     ]
 # %% normalization
 df_list_norm_deviation = [normalize_deviation(df) for df in df_list]
 df_list_norm_avs = [
     normalize_minusonetozero(df, to_normalize_col="a_values")
     for df in df_list
 ]
 # rename normed cols
 old_name_dev = "deviation_score"
 new_name_dev = "deviation_score_norm"
def get_partial_corr_df(indx_align_n = 0, w03 = winsize03, w04 = winsize04, w05 = winsize05, w06 = winsize06,
                        w07 = winsize07):
    """
    get one partial corr dataframe for given angle size, indicated by indx_align_n
    """
    w03 = get_data_to_analysis(w03, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode",
                               "colorcode5levels")
    w04 = get_data_to_analysis(w04, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode",
                               "colorcode5levels")
    w05 = get_data_to_analysis(w05, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode",
                               "colorcode5levels")
    w06 = get_data_to_analysis(w06, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode",
                               "colorcode5levels")
    w07 = get_data_to_analysis(w07, "deviation_score", alignment[indx_align_n], "N_disk", "list_index", "colorcode",
                               "colorcode5levels")

    method = "pearson"
    try:
        partial_corr_03 = pg.partial_corr(w03, x = "deviation_score", y = alignment[indx_align_n], covar = "N_disk",
                                          method = method)
    except Exception:
        partial_corr_03 = pd.DataFrame()

    try:
        partial_corr_04 = pg.partial_corr(w04, x = "deviation_score", y = alignment[indx_align_n], covar = "N_disk",
                                          method = method)
    except Exception:
        partial_corr_04 = pd.DataFrame()

    try:
        partial_corr_05 = pg.partial_corr(w05, x = "deviation_score", y = alignment[indx_align_n], covar = "N_disk",
                                          method = method)
    except Exception:
        partial_corr_05 = pd.DataFrame()
    partial_corr_06 = pg.partial_corr(w06, x = "deviation_score", y = alignment[indx_align_n], covar = "N_disk",
                                      method = method)
    partial_corr_07 = pg.partial_corr(w07, x = "deviation_score", y = alignment[indx_align_n], covar = "N_disk",
                                      method = method)

    # normalization
    w03_norm_deviation = normalize_deviation(w03)
    w04_norm_deviation = normalize_deviation(w04)
    w05_norm_deviation = normalize_deviation(w05)
    w06_norm_deviation = normalize_deviation(w06)
    w07_norm_deviation = normalize_deviation(w07)

    w03_norm_align_v = normalize_zerotoone(w03, to_normalize_col = alignment[indx_align_n])
    w04_norm_align_v = normalize_zerotoone(w04, to_normalize_col = alignment[indx_align_n])
    w05_norm_align_v = normalize_zerotoone(w05, to_normalize_col = alignment[indx_align_n])
    w06_norm_align_v = normalize_zerotoone(w06, to_normalize_col = alignment[indx_align_n])
    w07_norm_align_v = normalize_zerotoone(w07, to_normalize_col = alignment[indx_align_n])
    # rename normed cols
    old_name_dev = "deviation_score"
    new_name_dev = "deviation_score_norm"
    old_name_alig = alignment[indx_align_n]
    new_name_alig = alignment[indx_align_n] + "_norm"
    w03_norm_deviation = rename_norm_col(w03_norm_deviation, old_name_dev, new_name_dev)
    w04_norm_deviation = rename_norm_col(w04_norm_deviation, old_name_dev, new_name_dev)
    w05_norm_deviation = rename_norm_col(w05_norm_deviation, old_name_dev, new_name_dev)
    w06_norm_deviation = rename_norm_col(w06_norm_deviation, old_name_dev, new_name_dev)
    w07_norm_deviation = rename_norm_col(w07_norm_deviation, old_name_dev, new_name_dev)
    w03_norm_align_v = rename_norm_col(w03_norm_align_v, old_name_alig, new_name_alig)
    w04_norm_align_v = rename_norm_col(w04_norm_align_v, old_name_alig, new_name_alig)
    w05_norm_align_v = rename_norm_col(w05_norm_align_v, old_name_alig, new_name_alig)
    w06_norm_align_v = rename_norm_col(w06_norm_align_v, old_name_alig, new_name_alig)
    w07_norm_align_v = rename_norm_col(w07_norm_align_v, old_name_alig, new_name_alig)
    # contact orig dataframe with new normalized dataframe
    w03 = pd.concat([w03, w03_norm_deviation, w03_norm_align_v], axis = 1)
    w04 = pd.concat([w04, w04_norm_deviation, w04_norm_align_v], axis = 1)
    w05 = pd.concat([w05, w05_norm_deviation, w05_norm_align_v], axis = 1)
    w06 = pd.concat([w06, w06_norm_deviation, w06_norm_align_v], axis = 1)
    w07 = pd.concat([w07, w07_norm_deviation, w07_norm_align_v], axis = 1)
    # new data to cal partial corr
    my_data_new = pd.concat([w03, w04, w05, w06, w07], axis = 0, sort = True)

    partial_corr_all = pg.partial_corr(my_data_new, x = "deviation_score", y = alignment[indx_align_n],
                                       covar = "N_disk",
                                       method = method)

    partial_corr = pd.concat(
            [partial_corr_03, partial_corr_04, partial_corr_05, partial_corr_06, partial_corr_07, partial_corr_all],
            axis = 0)
    return partial_corr
def SFC_regress_out_distance(structure_file_path, function_file_path,
                             electrode_localization_by_atlas_file_path,
                             outputfile):

    import pingouin as pg
    #Get functional connecitivty data in pickle file format
    with open(function_file_path, 'rb') as f:
        broadband, alphatheta, beta, lowgamma, highgamma, electrode_row_and_column_names, order_of_matrices_in_pickle_file = pickle.load(
            f)
    FC_list = [broadband, alphatheta, beta, lowgamma, highgamma]

    # set up the dataframe of electrodes to analyze
    final_electrodes = pd.DataFrame(electrode_row_and_column_names,
                                    columns=['electrode_name'])
    final_electrodes = final_electrodes.reset_index()
    final_electrodes = final_electrodes.rename(columns={"index": "func_index"})

    #Get Structural Connectivity data in mat file format. Output from DSI studio
    structural_connectivity_array = np.array(
        pd.DataFrame(loadmat(structure_file_path)['connectivity']))

    #Get electrode localization by atlas csv file data. From get_electrode_localization.py
    electrode_localization_by_atlas = pd.read_csv(
        electrode_localization_by_atlas_file_path)

    # normalizing and log-scaling the structural matrices
    structural_connectivity_array[structural_connectivity_array == 0] = 1
    structural_connectivity_array = np.log10(
        structural_connectivity_array
    )  # log-scaling. Converting 0s to 1 to avoid taking log of zeros
    structural_connectivity_array = structural_connectivity_array / np.max(
        structural_connectivity_array)  # normalization

    #Only consider electrodes that are in both the localization and the pickle file
    final_electrodes = final_electrodes.merge(electrode_localization_by_atlas,
                                              on='electrode_name')
    # Remove electrodes in the Functional Connectivity matrices that have a region of 0
    final_electrodes = final_electrodes[final_electrodes['region_number'] != 0]
    for i in range(len(FC_list)):
        FC_list[i] = FC_list[i][final_electrodes['func_index'], :, :]
        FC_list[i] = FC_list[i][:, final_electrodes['func_index'], :]

    #Fisher z-transform of functional connectivity data. This is to take means of correlations and do correlations to the structural connectivity
    #Fisher z transform is just arctanh
    for i in range(len(FC_list)):
        FC_list[i] = np.arctanh(FC_list[i])

    # Remove structural ROIs not in electrode_localization ROIs
    electrode_ROIs = np.unique(np.array(final_electrodes.iloc[:, 5]))
    electrode_ROIs = electrode_ROIs[~(electrode_ROIs == 0)]  #remove region 0
    structural_index = electrode_ROIs - 1  #subtract 1 because of python's zero indexing
    structural_connectivity_array = structural_connectivity_array[
        structural_index, :]
    structural_connectivity_array = structural_connectivity_array[:,
                                                                  structural_index]

    #taking average functional connectivity for those electrodes in same atlas regions

    # produce the distance matrix to regress out
    dist_matrix = np.zeros(
        (final_electrodes.shape[0], final_electrodes.shape[0]))
    for i in range(0, final_electrodes.shape[0]):
        for j in range(0, final_electrodes.shape[0]):
            if (i != j):
                c_i = final_electrodes.iloc[i, 2:5]
                c_j = final_electrodes.iloc[j, 2:5]
                dist = np.sqrt((c_i[0] - c_j[0])**2 + (c_i[1] - c_j[1])**2 +
                               (c_i[2] - c_j[2])**2)
                dist_matrix[i, j] = dist
                dist_matrix[j, i] = dist

    for i in range(len(FC_list)):
        ROIs = np.array(final_electrodes.iloc[:, 5])
        for r in range(len(electrode_ROIs)):
            index_logical = (ROIs == electrode_ROIs[r])
            index_first = np.where(index_logical)[0][0]
            index_second_to_end = np.where(index_logical)[0][1:]
            mean = np.mean(FC_list[i][index_logical, :, :], axis=0)
            # add in code to average the distance in the regions
            # only need to modify distance once
            if (i == 0):
                mean_dist = np.mean(dist_matrix[index_logical, :], axis=0)
            # Fill in with mean.
            FC_list[i][index_first, :, :] = mean
            FC_list[i][:, index_first, :] = mean
            # fill in with mean distance
            if (i == 0):
                dist_matrix[index_first, :] = mean_dist
                dist_matrix[:, index_first] = mean_dist
            #delete the other rows and oclumns belonging to same region.
            FC_list[i] = np.delete(FC_list[i], index_second_to_end, axis=0)
            FC_list[i] = np.delete(FC_list[i], index_second_to_end, axis=1)

            # delete the other rows and columns in the distance matrix
            if (i == 0):
                dist_matrix = np.delete(dist_matrix,
                                        index_second_to_end,
                                        axis=0)
                dist_matrix = np.delete(dist_matrix,
                                        index_second_to_end,
                                        axis=1)
            #keeping track of which electrode labels correspond to which rows and columns
            ROIs = np.delete(ROIs, index_second_to_end, axis=0)
        #remove electrodes in the ROI labeld as zero
        index_logical = (ROIs == 0)
        index = np.where(index_logical)[0]
        FC_list[i] = np.delete(FC_list[i], index, axis=0)
        FC_list[i] = np.delete(FC_list[i], index, axis=1)
        ROIs = np.delete(ROIs, index, axis=0)
        # remove electrodes in the ROI labeled as zero from distance
        dist_matrix = np.delete(dist_matrix, index, axis=0)
        dist_matrix = np.delete(dist_matrix, index, axis=1)

    #order FC matrices by ROIs
    order = np.argsort(ROIs)
    for i in range(len(FC_list)):
        FC_list[i] = FC_list[i][order, :, :]
        FC_list[i] = FC_list[i][:, order, :]

    # order the distance matrix by ROIs
    dist_matrix = dist_matrix[order, :]
    dist_matrix = dist_matrix[:, order]

    #un-fisher ztranform
    for i in range(len(FC_list)):
        FC_list[i] = np.tanh(FC_list[i])

    #initialize correlation arrays
    Corrrelation_list = [None] * len(FC_list)
    for i in range(len(FC_list)):
        Corrrelation_list[i] = np.zeros([FC_list[0].shape[2]], dtype=float)

    correlation_type = 'spearman'
    #calculate Structure-Function Correlation.
    for i in range(len(FC_list)):
        for t in range(FC_list[i].shape[2] - 1):
            #Spearman Rank Correlation: functional connectivity and structural connectivity are non-normally distributed. So we should use spearman
            to_corr_df = pd.DataFrame({
                'func':
                np.ndarray.flatten(FC_list[i][:, :, t]),
                'dist':
                np.ndarray.flatten(dist_matrix),
                'struc':
                np.ndarray.flatten(structural_connectivity_array)
            })
            if correlation_type == 'spearman':
                Corrrelation_list[i][t] = pg.partial_corr(
                    to_corr_df,
                    x='func',
                    y='struc',
                    covar='dist',
                    method='spearman'
                ).iloc[
                    0,
                    1]  #spearmanr(  np.ndarray.flatten(FC_list[i][:,:,t]) , np.ndarray.flatten(structural_connectivity_array)   ).correlation
                #print("spearman")
            # Pearson Correlation: This is calculated bc past studies use Pearson Correlation and we want to see if these results are comparable.
            if correlation_type == 'pearson':
                Corrrelation_list[i][t] = pg.partial_corr(
                    to_corr_df,
                    x='func',
                    y='struc',
                    covar='dist',
                    method='pearson'
                ).iloc[
                    0,
                    1]  #pearsonr(np.ndarray.flatten(FC_list[i][:, :, t]), np.ndarray.flatten(structural_connectivity_array))[0]

    order_of_matrices_in_pickle_file = pd.DataFrame(
        ["broadband", "alphatheta", "beta", "lowgamma", "highgamma"],
        columns=["Order of matrices in pickle file"])
    with open(outputfile, 'wb') as f:
        pickle.dump([
            Corrrelation_list[0], Corrrelation_list[1], Corrrelation_list[2],
            Corrrelation_list[3], Corrrelation_list[4],
            order_of_matrices_in_pickle_file
        ], f)