Exemplo n.º 1
0
def wilcoxon_test_err_and_dt(dt_exp=-19, err_exp=-13):
    res = defaultdict(list)
    for var in FIELDS_MAP.keys():
        dt_df = get_svd('dt', 2, var)[dt_exp]
        err_df = get_svd('err', 2, var)[err_exp]
        for fc in range(1, 6):
            w_test_res = pg.wilcoxon(dt_df[fc], err_df[fc])
            pval = w_test_res['p-val'].values[0]
            res[var].append(pval)
    return pd.DataFrame(res, index=range(1, 6))
Exemplo n.º 2
0
def wilcoxon_test(err_or_dt, base, min_exp_bound=-float('inf'), ignore_exp=()):
    dict_for_steps_pairs = defaultdict(lambda: defaultdict(list))
    for var in FIELDS_MAP.keys():
        steps_var_data = get_svd(err_or_dt, base, var, min_exp_bound, ignore_exp)
        svd_items = tuple(steps_var_data.items())
        for (s1, df1), (s2, df2) in zip(svd_items[:-1], svd_items[1:]):
            dict_for_this_pair = dict_for_steps_pairs[(s1, s2)]
            for fc in range(1, 6):
                w_test_res = pg.wilcoxon(df1[fc], df2[fc])
                pval = w_test_res['p-val'].values[0]
                dict_for_this_pair[var].append(pval)
    return {k: pd.DataFrame(v, index=range(1, 6)) for k, v in dict_for_steps_pairs.items()}
Exemplo n.º 3
0
    def nonparamsignificance(self, *arrays):
        from scipy.stats import mannwhitneyu, kruskal, wilcoxon
        import pingouin as pg

        self.d1 = arrays[0]
        self.d2 = arrays[1]
        metode = []
        p = []
        stat = []
        ket = []

        def keterangan(pval):
            alpha = 0.05
            if pval > alpha:
                return 'Same distribution (fail to reject H0)'
            else:
                return 'Different distribution (reject H0)'

        metode.append('Mann-Whitney U test')
        a, b = mannwhitneyu(self.d1, self.d2)
        stat.append(a)
        p.append(b)
        ket.append(keterangan(b))

        metode.append('Kruskal-Wallis H Test')
        a, b = kruskal(self.d1, self.d2)
        stat.append(a)
        p.append(b)
        ket.append(keterangan(b))

        metode.append('Wilcoxon')
        a, b = wilcoxon(self.d1, self.d2, correction=True)
        stat.append(a)
        p.append(b)
        ket.append(keterangan(b))

        results = {
            'Method': metode,
            'Statistic': stat,
            'p-value': p,
            'Conclusion': ket,
        }
        results = pd.DataFrame(results)
        print('5-Number of statistic for D1:')
        self.fivenumberplus(self.d1)

        print('5-Number of statistic for D2:')
        self.fivenumberplus(self.d2)

        print(results)
        print(pg.wilcoxon(self.d1, self.d2, tail='two-sided'))
        return results
Exemplo n.º 4
0
    def perform_kappa_ttest(self):

        # MIXED ANOVA  ------------------------------------------------------------------------------------------------
        print(
            "\nPerforming unpaired T-test between activity groups on Cohen's Kappa values."
        )

        high = self.df_kappa_long.groupby("Group").get_group("HIGH")["Kappa"]
        low = self.df_kappa_long.groupby("Group").get_group("LOW")["Kappa"]

        self.kappa_ttest = pg.ttest(high, low, paired=False, correction='auto')

        # Approximates hedges g using d x (1 - (3 / (4*(n1 + n2) - 9))
        self.kappa_ttest["hedges-g"] = self.kappa_ttest["cohen-d"] * (
            1 - (3 / (4 * 2 * high.shape[0] - 9)))
        print(self.kappa_ttest)

        self.kappa_wilcoxon = pg.wilcoxon(high, low)
        print(self.kappa_wilcoxon)
Exemplo n.º 5
0
'''New November stuff'''
#anterior hippopcamus
df = pd.read_csv('extracted_mem_apriori_gPPI.csv')

df = df.set_index(['target', 'seed', 'cope', 'subject']).sort_index()
df = (df.loc['hc_head'] - df.loc['rACC'])
# df = (df.loc['CS+E'] - df.loc['CS+A'])
df = df.loc[('amyg_bla', ['CS+A', 'CS+E']), ].reset_index()
df['group'] = df.subject.apply(lgroup)

fig, ax = plt.subplots()
sns.barplot(data=df,
            x='group',
            y='conn',
            hue='cope',
            palette=['darkmagenta', 'seagreen'],
            ax=ax)
ax.set_ylabel('vmPFC > Amyg. BLA connectivity')
ax.set_title('gPPI seed = Hc Head (anterior)')

pg.wilcoxon(df.conn[df.cope == 'CS+E'][df.group == 'healthy'],
            df.conn[df.cope == 'CS+A'][df.group == 'healthy'])

stats = pd.read_csv('extracted_mem_apriori_gPPI.csv')
stats['group'] = stats.subject.apply(lgroup)
stats = stats.set_index(['target', 'seed', 'cope', 'group', 'subject'])
pg.wilcoxon(stats.loc[('sgACC', 'hc_head', 'CS+E', 'healthy'), 'conn'],
            stats.loc[('amyg_bla', 'hc_head', 'CS+E', 'healthy'), 'conn'])

out = stats.loc[(['sgACC', 'amyg_bla'], 'hc_head', ['CS+E', 'CS+A']), 'conn']
Exemplo n.º 6
0
    def wilcoxon_test(self,
                      value_col,
                      group_col,
                      condition=False,
                      baseline=None,
                      display_result=True,
                      file=None,
                      display_info=True):
        # collect data
        df = self.__get_condition(self.df, condition)

        # collect group values to compare
        groups = self.__ordered_values(group_col)

        # baseline gets special treatment
        if baseline is not None:
            groups = [x for x in groups if x != baseline]
            groups.append(baseline)

        # setup dict to construct dataframe
        if baseline == None:
            results = {
                'A': [],
                'B': [],
                'W': [],
                'p': [],
                'bonf': [],
                'RBC': [],
                'CLES': []
            }
        else:
            results = {}
            for group in groups:
                results[group] = []

        # collect all pairs to compare
        to_compare = []
        for g1 in groups:
            for g2 in groups:
                if g1 != g2 and not (g2, g1) in to_compare:
                    to_compare.append((g1, g2))

        # compute results
        if baseline == None:
            # compare all groups to each other
            for (g1, g2) in to_compare:
                # perform wilcoxon
                s1 = df[df[group_col] == g1][value_col]
                s2 = df[df[group_col] == g2][value_col]
                stats = wilcoxon(s1, s2)
                # read results
                W = stats['W-val'].values[0]
                p = stats['p-val'].values[0]
                bonf = self.__apply_bonferroni(p, len(to_compare))
                rbc = stats['RBC'].values[0]
                cles = stats['CLES'].values[0]
                # results
                results['A'].append(g1)
                results['B'].append(g2)
                results['W'].append(W)
                results['p'].append(self.__check_p(p))
                results['bonf'].append(self.__check_p(bonf))
                results['RBC'].append(round(rbc, 5))
                results['CLES'].append(round(cles, 5))

            # create dataframe
            df_res = pd.DataFrame(results)

        else:
            # only compare to baseline
            for (g1, g2) in to_compare:
                # check if this is compared to baseline
                if g2 != baseline:
                    continue

                # perform wilcoxon
                s1 = df[df[group_col] == g1][value_col]
                s2 = df[df[group_col] == g2][value_col]
                stats = wilcoxon(s1, s2)
                # read results
                W = stats['W-val'].values[0]
                p = stats['p-val'].values[0]
                bonf = self.__apply_bonferroni(p, len(groups) - 1)
                rbc = stats['RBC'].values[0]
                cles = stats['CLES'].values[0]
                # results
                results[g1].append(self.__check_p(p))
                results[g1].append(self.__check_p(bonf))
                results[g1].append(W)
                results[g1].append(round(rbc, 5))

            df_res = pd.DataFrame(results,
                                  index=pd.Index(['p', 'bonf', 'W', 'r'],
                                                 name='value'),
                                  columns=pd.Index(groups[:-1], name='group'))

        if display_result:
            if display_info:
                print("################")
                print("### Wilcoxon ###")
                print("################")
                if not condition is False:
                    print(self.__condition_to_string(condition))
            display(df_res)
            print("")

        if file is not None:
            df_res.to_csv(file)
Exemplo n.º 7
0
def test_posthoc(df, dep_var, ind_vars, is_non_normal=None):
    # print(f'\n{dep_var}')
    ind_vars = sorted(ind_vars)

    if is_non_normal == None:
        normality_p = test_normality(df, dep_var, ind_vars)
        significants = [p for p in normality_p if p < 0.01]
        is_non_normal = len(significants) > 0

    iv_combinations = []

    for iv in ind_vars:
        for iv1 in ind_vars:
            if (iv != iv1) and ((iv, iv1) not in iv_combinations) and (
                (iv1, iv) not in iv_combinations):
                iv_combinations.append((iv, iv1))

    for comb in iv_combinations:
        x = df.loc[df['Condition number'] == comb[0]][dep_var]
        y = df.loc[df['Condition number'] == comb[1]][dep_var]

        try:
            if is_non_normal:
                # s, p = wilcoxon(x, y)
                results = pg.wilcoxon(x, y, alternative='two-sided')
                results = results.round(4)

                t = list(results['W-val'])[0]
                p = list(results['p-val'])[0]

                prefix = '   '

                if p < .05:
                    prefix = '*  '
                if p < .01:
                    prefix = '** '
                if p < .001:
                    prefix = '***'

                print(
                    f'{prefix}{comb} Wilco: W={round(t, 2)}, p={round(p, 3)}')
            else:
                paired = True if len(x) == len(y) else False
                results = pg.ttest(x,
                                   y,
                                   paired=paired,
                                   alternative='two-sided')
                results = results.round(4)

                t = list(results['T'])[0]
                p = list(results['p-val'])[0]

                prefix = '   '

                if p < .05:
                    prefix = '*  '
                if p < .01:
                    prefix = '** '
                if p < .001:
                    prefix = '***'

                print(
                    f'{prefix}{comb} Ttest: t={round(t, 2)}, p={round(p, 3)}')

        except Exception as e:
            print(f'Error in {comb}: {e}')

    return
Exemplo n.º 8
0
    def qualOrdinalPaired(imgDir, sheetName, sheetDf, sheetScale, silent=True):
        print("######################################## ", sheetName,
              " ########################################"
              ) if not silent else None
        meltedSheetDf = sheetDf.melt(var_name='Factor', value_name='Variable')
        contingencySheetDf = pd.crosstab(index=meltedSheetDf['Variable'],
                                         columns=meltedSheetDf['Factor'])
        statDf = pd.DataFrame(columns=[
            'COMPARISON', 'TEST', 'STATISTICS', 'P-VALUE', 'EFFECT SIZE'
        ])
        #fill empty scale value
        for sheetStep in range(sheetScale):
            if not sheetStep in contingencySheetDf.index.values:
                contingencySheetDf.loc[sheetStep] = [
                    0 for x in range(len(contingencySheetDf.columns.values))
                ]
        contingencySheetDf.sort_index(inplace=True)

        # ALL MODALITY
        if len(contingencySheetDf.columns) > 2:
            sheetDf_long = sheetDf.melt(ignore_index=False).reset_index()
            friedman_stats = pg.friedman(data=sheetDf_long,
                                         dv="value",
                                         within="variable",
                                         subject="index")
            source, wvalue, ddof1, qvalue, pvalue = friedman_stats.values[0]
            statDf = statDf.append(
                {
                    'COMPARISON': 'ALL',
                    'TEST': "Friedman",
                    'STATISTICS': qvalue,
                    'P-VALUE': pvalue,
                    'EFFECT SIZE': wvalue
                },
                ignore_index=True)

        # BETWEEN MODALITY
        modality_names = sheetDf.columns.values
        uncorrectedStatIndex = len(statDf.index)
        for i in range(len(modality_names)):
            for j in range(i + 1, len(modality_names)):
                stats_wilcoxon = pg.wilcoxon(sheetDf.loc[:, modality_names[i]],
                                             sheetDf.loc[:, modality_names[j]],
                                             correction=False,
                                             alternative='two-sided')
                wvalue, alternative, pvalue, RBC, CLES = stats_wilcoxon.values[
                    0]
                statDf = statDf.append(
                    {
                        'COMPARISON':
                        modality_names[i] + '|' + modality_names[j],
                        'TEST': "Wilcoxon",
                        'STATISTICS': wvalue,
                        'P-VALUE': pvalue,
                        'EFFECT SIZE': RBC
                    },
                    ignore_index=True)
        reject, statDf.loc[uncorrectedStatIndex::, 'P-VALUE'] = pg.multicomp(
            statDf.loc[uncorrectedStatIndex::, 'P-VALUE'].values,
            alpha=0.05,
            method="holm")

        StackedBarPlotter.StackedBarPlotter(filename=imgDir + '/' + sheetName +
                                            '.png',
                                            title=sheetName,
                                            dataDf=sheetDf,
                                            histDf=contingencySheetDf,
                                            statDf=statDf)