Exemplo n.º 1
0
def permtest_ANOVA_paired(data_panda, behavMeasure, Conds, reps):

    # initialize vector to hold statistic on each iteration
    rand_vals = list()

    # get observed statistics (interaction) for two-way ANOVA
    aovrm2way = AnovaRM(data_panda, behavMeasure, 'Subject_ID', within=Conds)
    results_table = aovrm2way.fit()
    F_vals = results_table.anova_table['F Value']

    # get observed interaction F-value: condition-task
    obs_stat = F_vals[2]

    # deep copy of panda structure
    shuffled_panda = data_panda.copy()

    # loop through repetitions
    for ii in range(reps):

        print('\r{} of {}'.format(ii, reps), end='')

        # H: shuffle column with behavioral measure of interest (PC or RT) WITHIN subject.
        # H: In essence, I am shuffling PC across conditions, but within subject
        shuffled_panda["behavMeasure_shuffled"] = shuffled_panda.groupby(
            "Subject_ID")[behavMeasure].transform(np.random.permutation)

        # H: get randomized statistic (interaction) for two-way ANOVA
        aovrm2way_rand = AnovaRM(shuffled_panda,
                                 "behavMeasure_shuffled",
                                 'Subject_ID',
                                 within=Conds)
        results_table_rand = aovrm2way_rand.fit()
        F_vals_rand = results_table_rand.anova_table['F Value']

        # get interaction F-value for shuffled structure: condition-task
        rand = F_vals_rand[2]

        # push back rand F value
        rand_vals.append(rand)

    rand_vals = np.array(rand_vals)

    # look at probability on either side of the distribution based on the observed statistic - this function is
    # therefore order invariant with respect to its inputs
    prob = np.mean(rand_vals > obs_stat)

    _ = plt.hist(rand_vals,
                 bins='auto')  # arguments are passed to np.histogram
    plt.show()

    print(f'p = {prob}')
    print(f'obs_stat = {obs_stat}')

    return obs_stat, prob
Exemplo n.º 2
0
def test_repeated_measures_aggregate_func():
    assert_raises(ValueError, AnovaRM, data.append(data), 'DV', 'id',
                  within=['A', 'B', 'D'])

    m1 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
                 aggregate_func=np.mean)
    m2 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
                 aggregate_func=np.median)

    assert_raises(AssertionError, assert_equal,
                  m1.aggregate_func, m2.aggregate_func)
    assert_frame_equal(m1.fit().anova_table, m2.fit().anova_table)
Exemplo n.º 3
0
def three_sample_test(sample1, sample2, sample3, test):
    if test == "anova": #parametric, between-subjects.
        test_stat, p_val = scipy.stats.f_oneway(sample1, sample2, sample3)
    elif test =="rm-anova": #parametric, within-subjects.
        data = {"response": [], "id": [], "group": []}
        for i in range(len(sample1)):
            data["response"].append(sample1[i])
            data["id"].append(i)
            data["group"].append("A")

            data["response"].append(sample2[i])
            data["id"].append(i)
            data["group"].append("B")

            data["response"].append(sample3[i])
            data["id"].append(i)
            data["group"].append("C")

        df = pd.DataFrame(data=data)
        anova_rm = AnovaRM(df,depvar="response",subject="id",within=["group"])
        res = anova_rm.fit()
        test_stat = res.anova_table['F Value'][0]
        p_val = res.anova_table['Pr > F'][0]
    elif test == "kruskal-wallis": #nonparametric, between-subjects.
        test_stat, p_val = scipy.stats.kruskal(sample1, sample2, sample3)
    elif test == "friedman": #nonparametric, within-subjects.
        test_stat, p_val = scipy.stats.friedmanchisquare(sample1, sample2, sample3)
    return test_stat,p_val
Exemplo n.º 4
0
def rm_one_way_anova(dataset: Dataset, design, combined_data: CombinedData):
    data = dataset.data
    xs = combined_data.get_explanatory_variables()
    ys = combined_data.get_explained_variables()

    assert (len(ys) == 1)
    y = ys[0]
    between_subjs = []
    within_subjs = []
    for x in xs:
        if "between subjects" in design and design[
                "between subjects"] == x.metadata[name]:
            between_subjs.append(x.metadata[name])
        if "within subjects" in design and design[
                "within subjects"] == x.metadata[name]:
            within_subjs.append(x.metadata[name])

    # import pdb; pdb.set_trace()
    id = dataset.pid_col_name
    aovrm2way = AnovaRM(data,
                        depvar=y.metadata[name],
                        subject=id,
                        within=within_subjs)
    # aovrm2way = AnovaRM(data, depvar=y.metadata[name], subject=dataset.pid_col_name, within=within_subjs, between=between_subjs) # apparently not implemented in statsmodels
    # import pdb; pdb.set_trace()
    res2way = aovrm2way.fit()
Exemplo n.º 5
0
def continuous_paired_group_repeated_measures_anova(**kwargs):
    data_frame = kwargs["data_frame"]
    dependable_variable = kwargs["dependable_variable"]
    conditions = kwargs["conditions"]

    # make one condition out of multiple, otherwise not supported by AnovaRM
    sLength = len(data_frame[dependable_variable])
    data_frame.loc[:, 'condition'] = pd.Series(np.empty(sLength),
                                               index=data_frame.index)
    if isinstance(conditions, list) and len(conditions) > 1:
        for name, group in data_frame.groupby(conditions):
            data_frame.loc[
                data_frame.groupby(conditions).get_group(name).index,
                "condition"] = "_".join(name)

    data_frame.drop(columns=conditions)
    # todo: list in conditions not supported map to signle condition required, reduce subject size other wise
    #aovrm = AnovaRM(data_frame, depvar=dependable_variable, subject='test_index', within=conditions)
    aovrm = AnovaRM(data_frame[data_frame["test_index"] < 1000],
                    dependable_variable,
                    'test_index',
                    within=["condition"],
                    aggregate_func=np.mean)
    res = aovrm.fit()

    print(res)
    # todo: how to read pvalue res.summary()...
    return True, 100
Exemplo n.º 6
0
    def rm(self, data, dep_var, subject, within, aggregate_func=None):
        """
        Repeated Measures ANOVA

        Parameters:
        ----------
        data: DataFrame
            Contains at least 3 columns that are 'dependent variable', 'subject', and 'factor' respectively.
        dep_var: str
            Name of the 'dependent variable' column.
        subject: str
            Name of the 'subject' column. (subject identifier)
        within: a list of strings
            Names of the at least one 'factor' columns.

        Return:
        ------
        aov_table: DataFrame
            ANOVA table
        """
        aov_rm = AnovaRM(data,
                         dep_var,
                         subject,
                         within,
                         aggregate_func=aggregate_func)
        aov_table = aov_rm.fit().anova_table

        return aov_table
Exemplo n.º 7
0
def anova(diff1, diff2, recall, within_factors):
    
    r = 'recall'
    
    if not recall:
        r = 'recognition'
    
    diff1 = rearange(diff1, 'short', within_factors = within_factors,
                     recall = recall)
    diff2 = rearange(diff2, 'short', within_factors = within_factors, 
                     recall = recall)
    
    diffs_for_anova = pd.concat([diff1,diff2])
    
    #perform anova
    anovarm = AnovaRM(diffs_for_anova, 'performance', 'sub_id', 
                      within = within_factors, aggregate_func = 'mean')
    res = anovarm.fit()
    
    #rounded p value
    p = round(res.anova_table['Pr > F'][0],4)
    
    print(F'ANOVA ON DIFFERENCES in memory performance - {r}', res)
    
    return  diff1, diff2, diffs_for_anova
Exemplo n.º 8
0
def rm_one_way(xs, y, key, df):
    
    aovrm2way = AnovaRM(df, depvar=y, subject=key,
                        within=xs, aggregate_func='mean')

    res2way = aovrm2way.fit()

    return str(res2way)
Exemplo n.º 9
0
def rm_one_way(xs, y, key, df):
    between_subjs = []
    within_subjs = []

    aovrm2way = AnovaRM(df, depvar=y, subject=key, within=xs, aggregate_func='mean')
    
    res2way = aovrm2way.fit()
    # import pdb; pdb.set_trace()
    return str(res2way)
Exemplo n.º 10
0
def AnovaRM_with_post_hoc(data, dep_var, subject, within, only_significant = False):
    # One within
    anova = AnovaRM(data, dep_var, subject, within)
    print(anova.fit())
    # Post-hoc with ttest
    pairwise_ttest_rel(data,
                       dep_var,
                       within = within,
                       only_significant = only_significant
                      )        
Exemplo n.º 11
0
def rm_one_way_anova(dataset: Dataset, predictions, design,
                     combined_data: CombinedData):
    data = dataset.data
    xs = combined_data.get_explanatory_variables()
    ys = combined_data.get_explained_variables()

    assert (len(ys) == 1)
    y = ys[0]
    between_subjs = []
    within_subjs = []
    for x in xs:
        if "between subjects" in design and design[
                "between subjects"] == x.metadata[name]:
            between_subjs.append(x.metadata[name])
        if "within subjects" in design and design[
                "within subjects"] == x.metadata[name]:
            within_subjs.append(x.metadata[name])

    if predictions:
        if isinstance(predictions[0], list):
            prediction = predictions[0][0]
        else:
            prediction = predictions[0]
    else:
        prediction = None

    key = dataset.pid_col_name
    aovrm2way = AnovaRM(data,
                        depvar=y.metadata[name],
                        subject=key,
                        within=within_subjs,
                        aggregate_func='mean')
    # aovrm2way = AnovaRM(data, depvar=y.metadata[name], subject=dataset.pid_col_name, within=within_subjs, between=between_subjs) # apparently not implemented in statsmodels
    res2way = aovrm2way.fit()
    result_df = res2way.anova_table

    col_name = x.metadata[name]
    for row_name in result_df.index:
        if row_name == col_name:
            row_data = result_df.loc[row_name]
            test_statistic = row_data['F Value']
            p_val = row_data['Pr > F']
            dof = (row_data['Num DF'], row_data['Den DF'])

    test_result = TestResult(name=rm_one_way_anova_name,
                             test_statistic=test_statistic,
                             p_value=p_val,
                             prediction=prediction,
                             dof=dof,
                             alpha=combined_data.alpha,
                             table=result_df,
                             x=x,
                             y=y)

    return test_result
Exemplo n.º 12
0
def rm_anova(data=None, subject=None, within=None, between=None, dv=None):
    """
    Returns ANOVA table as dataframe.
    """
    anova = AnovaRM(data=data,
                    subject=subject,
                    within=within,
                    between=between,
                    depvar=dv)
    fit = anova.fit()
    return fit.anova_table
Exemplo n.º 13
0
def getRMAnova(dataSet, labels, verbose=False):
    tlabels = np.concatenate([[labels[j] for _,y in enumerate(x) ]for j,x in enumerate(dataSet)])
    concatData = np.concatenate(dataSet)
    ids = np.concatenate([np.arange(len(x)) for _,x in enumerate(dataSet)])
    d = {'id':ids, 'rt':concatData, 'cond':tlabels}
    df = pd.DataFrame(d)
    anovarm = AnovaRM(df, 'rt', 'id', within=['cond'])
    res = anovarm.fit()
    if verbose:
        print (res.summary())
    return res
Exemplo n.º 14
0
def rlrlRMANOVA(mes):
    # RL-RL ANOVA RM
    aexps = expandEvals(mes)

    print('********** RL Controller Error RMANOVA **********')
    aexps['s_id'] = (np.array(aexps.index.values.tolist()) + 1).tolist()
    avrm = AnovaRM(aexps, 'error', 's_id', within=['model'])
    rma = avrm.fit()
    print(rma)

    print('********** RL Controller Error RMANOVA **********')
    aexps['s_id'] = (np.array(aexps.index.values.tolist()) + 1).tolist()
    avrm = AnovaRM(aexps, 'rise_time', 's_id', within=['model'])
    rma = avrm.fit()
    print(rma)

    print('********** RL Controller Error RMANOVA **********')
    aexps['s_id'] = (np.array(aexps.index.values.tolist()) + 1).tolist()
    avrm = AnovaRM(aexps, 'energy', 's_id', within=['model'])
    rma = avrm.fit()
    print(rma)
Exemplo n.º 15
0
def test_repeated_measures_aggregate_func():
    assert_raises(ValueError,
                  AnovaRM,
                  data.append(data),
                  'DV',
                  'id',
                  within=['A', 'B', 'D'])

    m1 = AnovaRM(data.append(data),
                 'DV',
                 'id',
                 within=['A', 'B', 'D'],
                 aggregate_func=np.mean)
    m2 = AnovaRM(data.append(data),
                 'DV',
                 'id',
                 within=['A', 'B', 'D'],
                 aggregate_func=np.median)

    assert_raises(AssertionError, assert_equal, m1.aggregate_func,
                  m2.aggregate_func)
    assert_frame_equal(m1.fit().anova_table, m2.fit().anova_table)
Exemplo n.º 16
0
def rank_multiple_normal_homoscedastic(data, alpha, verbose, order,
                                       effect_size, force_mode):
    """
    Analyzes data using repeated measures ANOVA and Tukey HSD.
    """
    stacked_data = data.stack().reset_index()
    stacked_data = stacked_data.rename(columns={
        'level_0': 'id',
        'level_1': 'treatment',
        0: 'result'
    })
    anova = AnovaRM(stacked_data, 'result', 'id', within=['treatment'])
    pval = anova.fit().anova_table['Pr > F'].iat[0]
    if verbose:
        if pval >= alpha:
            print(
                "Fail to reject null hypothesis that there is no difference between the distributions (p=%f)"
                % pval)
        else:
            print(
                "Rejecting null hypothesis that there is no difference between the distributions (p=%f)"
                % pval)
            print(
                "Using Tukey HSD post hoc test.",
                "Differences are significant if the confidence intervals of the mean values are not overlapping."
            )

    multicomp = MultiComparison(stacked_data['result'],
                                stacked_data['treatment'])
    tukey_res = multicomp.tukeyhsd()
    # must create plot to get confidence intervals
    tukey_res.plot_simultaneous()
    # delete plot instead of showing
    plt.close()

    rankdf, effsize_method, reorder_pos = _create_result_df_skeleton(
        data,
        None,
        True,
        order,
        effect_size=effect_size,
        force_mode=force_mode)
    for population in rankdf.index:
        mean = data.loc[:, population].mean()
        ci_range = tukey_res.halfwidths[data.columns.get_loc(population)]
        lower, upper = mean - ci_range, mean + ci_range
        rankdf.at[population, 'ci_lower'] = lower
        rankdf.at[population, 'ci_upper'] = upper
    return _ComparisonResult(rankdf, pval, None, 'anova', 'tukeyhsd',
                             effsize_method, reorder_pos)
Exemplo n.º 17
0
def test_repeated_measures_aggregate_func():
    double_data = pd.concat([data, data], axis=0)
    assert_raises(ValueError,
                  AnovaRM,
                  double_data,
                  'DV',
                  'id',
                  within=['A', 'B', 'D'])

    m1 = AnovaRM(double_data,
                 'DV',
                 'id',
                 within=['A', 'B', 'D'],
                 aggregate_func=np.mean)
    m2 = AnovaRM(double_data,
                 'DV',
                 'id',
                 within=['A', 'B', 'D'],
                 aggregate_func=np.median)

    assert_raises(AssertionError, assert_equal, m1.aggregate_func,
                  m2.aggregate_func)
    assert_frame_equal(m1.fit().anova_table, m2.fit().anova_table)
Exemplo n.º 18
0
    def anovaRM(self, depvar, subject, within=None, between=None, aggregate_func=None):
        """
             Repeated measures Anova using least squares regression
             The full model regression residual sum of squares is used to compare
             with the reduced model for calculating the within-subject effect sum of squares.
             Currently, only fully balanced within-subject designs are supported. Calculation of
             between-subject effects and corrections for violation of sphericity are not yet implemented.

             Parameters
             ----------
             depvar: str
                 The dependent variable in data
             subject: str
                 Specify the subject id
             within: list[str]
                 The within-subject factors
            between: list[str]
                 The between-subject factors, this is not yet implemented
            aggregate_func: {None, ‘mean’, callable}
                 If the data set contains more than a single observation per
                 subject and cell of the specified model, this function will be
                 used to aggregate the data before running the Anova. None (the default)
                 will not perform any aggregation; ‘mean’ is s shortcut to numpy.mean.
                 An exception will be raised if aggregation is required,
                 but no aggregation function was specified.

            Returns
            ----------
            AnovaResults instance

            Notes
            ----------
            This implementation currently only supports fully balanced designs.
            If the data contain more than one observation per subject and cell of
            the design, these observations need to be aggregated into a single
            observation before the Anova is calculated, either manually or by passing
            an aggregation function via the aggregate_func keyword argument.
            Note that if the input data set was not balanced before performing
            the aggregation, the implied heteroscedasticity of the data is ignored.

            References
            ----------
            Rutherford, Andrew. Anova and ANCOVA: a GLM approach. John Wiley & Sons, 2011.

        """
        res = AnovaRM(self.__data, depvar, subject, within, between, aggregate_func)
        res = res.fit()
        print(res)
Exemplo n.º 19
0
    def fit(self,
            data,
            depvar,
            subject,
            within=None,
            between=None,
            aggregate_func=None):
        """Estimate the model and compute ANOVA table.
        
        Parameters
        ----------
        data : DataFrame
        depvar : str
            The dependent variable in `data`
        subject : str
            Specify the subject id
        within : list[str]
            The within-subject factors
        between : list[str]
            The between-subject factors, this is not yet implemented
        aggregate_func : {None, 'mean', callable}
            If the data set contains more than a single observation per subject
            and cell of the specified model, this function will be used to
            aggregate the data before running the Anova. `None` (the default) will
            not perform any aggregation; 'mean' is s shortcut to `numpy.mean`.
            An exception will be raised if aggregation is required, but no
            aggregation function was specified.

        Returns
        -------
        results : AnovaResults instance

        Raises
        ------
        ValueError
            If the data need to be aggregated, but `aggregate_func` was not
            specified.

        """
        anova = AnovaRM(data=data,
                        depvar=depvar,
                        subject=subject,
                        within=within,
                        between=between,
                        aggregate_func=aggregate_func)
        self._results = anova.fit()
Exemplo n.º 20
0
def calculate_anova(df):
    pvals = []
    num_subjs = 9
    for vox in tqdm(df):
        vox = calculate_avg_across_models(vox)
        vox = np.append(
            vox,
            np.reshape(np.array(list(range(1, num_subjs + 1))),
                       (num_subjs, 1)), 1)
        vox = pd.DataFrame(vox,
                           columns=['bert', 'baseline', 'opennmt', 'subject'])
        sub_vox = vox.melt(id_vars=["subject"],
                           var_name="model",
                           value_name="corr")
        aovrm2way = AnovaRM(sub_vox, "corr", "model", within=["subject"])
        mod = aovrm2way.fit()
        pval = mod.summary().tables[0]["Pr > F"]["subject"]
        pvals.append(pval)
    return pvals
Exemplo n.º 21
0
def anova(data):

    data = pd.melt(data,
                   id_vars='sub_id',
                   var_name='cond',
                   value_name='performance')

    #    #perform anova
    anovarm = AnovaRM(data, 'performance', 'sub_id', within=['cond'])
    res = anovarm.fit()

    #rounded p value
    p = round(res.anova_table['Pr > F'][0], 2)
    F = round(res.anova_table['F Value'][0], 2)
    #    print(F'ANOVA ON DIFFERENCES in memory performance - {r}', res)
    print(F, p)

    res2 = [[F, p]]

    return res2
Exemplo n.º 22
0
def calculate_anova(args, all_corrs):
    dims = all_corrs[0][0].shape
    pvals = np.zeros((dims[0], dims[1], dims[2]))
    num_layers = 12
    num_subjs = 9
    print("LEN: " + str(len(all_corrs)))
    print("DIMS: " + str(all_corrs[0][0].shape))

    for i in tqdm(range(dims[0])):
        for j in range(dims[1]):
            for k in range(dims[2]):

                vals_across_subjs_and_layers = []
                for subj in range(num_subjs):
                    for layer in range(num_layers):
                        val = all_corrs[subj][layer][i][j][k]
                        vals_across_subjs_and_layers.append(
                            all_corrs[subj][layer][i][j][k])

                # make dataframe
                df = pd.DataFrame({
                    'voxel':
                    np.ones(len(vals_across_subjs_and_layers)),
                    'corr':
                    vals_across_subjs_and_layers,
                    'subject':
                    np.repeat(list(range(1, num_subjs + 1)), num_layers),
                    'layer':
                    np.tile(list(range(1, num_layers + 1)), num_subjs)
                })

                aovrm2way = AnovaRM(df,
                                    'voxel',
                                    'corr',
                                    within=['subject', 'layer'])
                mod = aovrm2way.fit()
                pval = mod.summary().tables[0]["Pr > F"]["subject:layer"]
                pvals[i][j][k] = pval
    return pvals
Exemplo n.º 23
0
def anova_group(means, recog):
 
    t = 'recall'
    
    if recog:
        t = 'recog'
        
        
    #melt df
    means = pd.melt(means,id_vars = 'sub_id',
                           var_name = 'cond', 
                           value_name = 'performance')
    
    
    
    anovarm = AnovaRM(means, 'performance', 'sub_id', within = ['cond'])
    res = anovarm.fit()
    
    p = round(res.anova_table['Pr > F'][0],4)
    
    print(F'reaction times anova ({t})', res)
    
    return p
Exemplo n.º 24
0
for i, row in dataset_spec.iterrows():
    dataset_spec.at[i, 'condition'] = "spec"
for i, row in dataset_sub.iterrows():
    dataset_sub.at[i, 'condition'] = "sub"
for i, row in dataset_rule.iterrows():
    dataset_rule.at[i, 'condition'] = "rule"
for i, row in dataset_gen.iterrows():
    dataset_gen.at[i, 'condition'] = "gen"

# concatenate all the dataframes
frames = [dataset_spec, dataset_sub, dataset_rule, dataset_gen]
result_df = pd.concat(frames)

#perform the ANOVA
aovrm = AnovaRM(result_df, 'OT', 'Subj_tr', within=['condition'])
res = aovrm.fit()

print(res)

################
##### use ######
### pingouin ###
################

import pingouin as pg
from pingouin import mixed_anova, read_dataset

df_ANOVA = result_df.rm_anova(dv='OT',
                              within='condition',
                              subject='Subj_tr',
                              detailed=True)
Exemplo n.º 25
0
print('')
print('Valores atípicos (outliers): tan sólo tenemos uno')
print('')
print('Igualdad de varianzas de las diferencias ente niveles de tratamiento:')
h**o = stats.levene(facebook['Visitantes'], pixel['Visitantes'],
                    wTienda['Visitantes'])
print(h**o)
print(
    "El test de Levene para la prueba de igualdad de varianzas me da un p-valor = %f "
    % h**o.pvalue)
print('Se cumple la hipótesis de homocedasticidad (hipótesis fuerte)')
print('')

print('Realizo ahora la prueba ANOVA-MR')
aovrm = AnovaRM(longRM, 'Visitantes', 'Mes', ['Procedencia'])
ajuste = aovrm.fit()
print(ajuste.summary())
print(
    'Obtengo un p-valor = 0.8436. Por tanto no rechazo la hipótesis nula de igualdad de medias'
)
print(
    'CONCLUSIÓN: en este subconjunto no importa la procedencia de la variable Visitantes, pues me proporcionan la misma información'
)
print('')

print('2º) Prueba estadística para muestras pareadas')
print('Breve estudio descriptivo')
print('Gráfico de cajas y bigotes:')
bp2 = plt.boxplot([
    gaCatalogo['Usuarios'],
    wCatalogo['Visitantes'][(len(wCatalogo['Visitantes']) -
values = [998, 511]

sub_id = [i + 1 for i in range(N)] * len(P)
mus = np.concatenate([np.repeat(value, N) for value in values]).tolist()
rt = np.random.normal(mus, scale=112.0, size=N * len(P)).tolist()
iv = np.concatenate([np.array([p] * N) for p in P]).tolist()

df = pd.DataFrame({"id": sub_id, "rt": rt, "iv": iv})

# %% [markdown] {"slideshow": {"slide_type": "subslide"}}
# Do the repeated measures ANOVA.

# %%
aovrm = AnovaRM(df, depvar="rt", subject="id", within=["iv"])
fit = aovrm.fit()
fit.summary()

# %% [markdown] {"slideshow": {"slide_type": "slide"}}
# # dfply

# %% [markdown]
# For those of you who are familiar with R and the tidyverse, the [dfply package](https://github.com/kieferk/dfply) allows you to have dplyr-like piping in Python. The pipe operator for this package is `>>`, while the result of each computation step is given by `X`. `>>=` is used for in-place assignment. All the documentation is available at the link; I'm just going to go over some useful basics here.

# %%
from dfply import *

diamonds >> head()

# %% [markdown] {"slideshow": {"slide_type": "subslide"}}
# ## Selection
def plotting_functions(m484, m479, m483, m478, m486, m480, m481,exp, n = 5):
   
    
    coef_subj =  runs_length(exp, subject_IDs ='all', n = n)
    
   # coef_subj = recordings_n_back(m484, m479, m483, m478, m486, m480, m481, n = n)
    coef_subj = np.asarray(coef_subj)
    rewards = coef_subj[:,:,:n]
    choices = coef_subj[:,:,n:n*2]
    choices_X_reward = coef_subj[:,:,n*2:-1]
    
     
   
    _1_back_ch = choices[:, :, 0]#[:,1:]
    _other_back_ch = np.mean(choices[:, :,1:],2)#[:,1:]
    _1_back_rew_ch = choices_X_reward[:,:,0]
    _other_back_rew_ch = np.mean(choices_X_reward[:,:,1:],2)
    _all_back_rew = np.mean(rewards,2)#[:,1:]
    
     
    # subject_id = np.tile(np.arange(7), 7)
    # fraction_id = np.zeros(7*7)
    # k = 0 
    # for n in range(10):
    #     fraction_id[n*7:n*7+7] = k
    #     k+=1
        
    
    subject_id = np.tile(np.arange(10), 9)
    fraction_id = np.zeros(90)
    k = 0 
    for n in range(10):
        fraction_id[n*9:n*9+9] = k
        k+=1
        
    _1_back = np.concatenate(_1_back_ch.T,0)
    _1_back = {'Data':_1_back,'Sub_id': subject_id,'cond': fraction_id}
    _1_back = pd.DataFrame.from_dict(data = _1_back)
    aovrm = AnovaRM(_1_back, depvar = 'Data',subject = 'Sub_id', within=['cond'])
    res = aovrm.fit()
    _1_back = res.anova_table
    p_val_1_back = np.around(res.anova_table['Pr > F'][0])

    _other_back_ch = np.concatenate(_other_back_ch.T,0)
    _other_back_ch = {'Data':_other_back_ch,'Sub_id': subject_id,'cond': fraction_id}
    _other_back_ch = pd.DataFrame.from_dict(data = _other_back_ch)
    aovrm = AnovaRM(_other_back_ch, depvar = 'Data',subject = 'Sub_id', within=['cond'])
    res = aovrm.fit()
    _other_back = res.anova_table

    p_val_other_back_ch = np.around(res.anova_table['Pr > F'][0])

    _1_back_rew_ch = np.concatenate(_1_back_rew_ch.T,0)
    _1_back_rew_ch = {'Data':_1_back_rew_ch,'Sub_id': subject_id,'cond': fraction_id}
    _1_back_rew_ch = pd.DataFrame.from_dict(data = _1_back_rew_ch)
    aovrm = AnovaRM(_1_back_rew_ch, depvar = 'Data',subject = 'Sub_id', within=['cond'])
    res = aovrm.fit()
    _1_back_re_ch = res.anova_table

    p_val_1_back_rew_ch = np.around(res.anova_table['Pr > F'][0])

    _other_back_rew_ch = np.concatenate(_other_back_rew_ch.T,0)
    _other_back_rew_ch = {'Data':_other_back_rew_ch,'Sub_id': subject_id,'cond': fraction_id}
    _other_back_rew_ch = pd.DataFrame.from_dict(data = _other_back_rew_ch)
    aovrm = AnovaRM(_other_back_rew_ch, depvar = 'Data',subject = 'Sub_id', within=['cond'])
    res = aovrm.fit()
    _other_back_reward_choice = res.anova_table

    p_val_1_back_other_back_rew_ch = np.around(res.anova_table['Pr > F'][0])

    
    
    _all_back_rew = np.concatenate(_all_back_rew.T,0)
    _all_back_rew = {'Data':_all_back_rew,'Sub_id': subject_id,'cond': fraction_id}
    _all_back_rew = pd.DataFrame.from_dict(data = _all_back_rew)
    aovrm = AnovaRM(_all_back_rew, depvar = 'Data',subject = 'Sub_id', within=['cond'])
    res = aovrm.fit()
    _back_rew = res.anova_table

    p_val_1_back_all_back_rew = np.around(res.anova_table['Pr > F'][0])

    
   
    
    _1_back_ch = np.mean(choices[:, :, 0],0)

    _1_back_ch_er = np.std(choices[:, :, 0],0)/np.sqrt(9)

    #_other_back_ch = np.mean(choices[:, :, 1:],2)
    _other_back_ch =  np.mean(np.mean(choices[:, :,1:],2),0)
    _other_back_ch_err =  np.std(np.mean(choices[:, :, 1:],2),0)/np.sqrt(9)

    #_all_back_rew = np.mean(rewards,2)
    _all_back_rew = np.mean(np.mean(rewards,2),0)
    _all_back_rew_err =  np.std(np.mean(rewards,2),0)/np.sqrt(9)

    #_all_back_rew_ch = np.mean(choices_X_reward,2)
    _1_back_rew_ch = np.mean(choices_X_reward[:,:,0],0)
    _1_back_rew_ch_err =  np.std(choices_X_reward[:,:,0],0)/np.sqrt(9)
 
    _all_back_rew_ch = np.mean(np.mean(choices_X_reward[:,:,1:],2),0)
    _all_back_rew_ch_err =  np.std(np.mean(choices_X_reward[:,:,1:],2),0)/np.sqrt(9)
 
    isl = wes.Royal2_5.mpl_colors

    plt.figure(figsize = (10,4))

    plt.subplot(1,5,1)
    plt.errorbar(np.arange(len(_all_back_rew)), _all_back_rew, yerr=_all_back_rew_err, fmt='o', color = isl[0])
    plt.annotate(p_val_1_back_all_back_rew, xy = (10,np.max(_all_back_rew)+0.01))
    plt.xlim(-1,10)
    plt.title(' N Rewards Back')
    plt.xticks(np.arange(10),np.arange(10)+1)
    plt.xlabel('Task')
 
    plt.subplot(1,5,2)
    #sns.boxplot(data =_1_back_ch, palette="Set3",showfliers = False)
    plt.errorbar(np.arange(len(_1_back_ch)), _1_back_ch, yerr=_1_back_ch_er, fmt='o', color = isl[3])
    plt.annotate(p_val_1_back, xy = (10,np.max(_1_back_ch)+0.01))
    plt.xlim(-1,10)
    plt.title(' 1 Choice Back')
    plt.xticks(np.arange(10),np.arange(10)+1)
    plt.xlabel('Task')
    plt.ylabel('Coefficient')

    plt.subplot(1,5,3)
    #sns.boxplot(data=_other_back_ch, palette="Set3",showfliers = False)
    plt.errorbar(np.arange(len(_other_back_ch)), _other_back_ch, yerr=_other_back_ch_err, fmt='o', color = isl[3])
    plt.annotate(p_val_other_back_ch, xy = (10,np.max(_other_back_ch)+0.01))
    plt.xlim(-1,10)
    plt.title(' 2+ Choices Back')
    plt.xticks(np.arange(10),np.arange(10)+1)
    plt.xlabel('Task')
    plt.ylabel('Coefficient')

    plt.subplot(1,5,4)
    #sns.boxplot(data=_all_back_rew_ch, palette="Set3",showfliers = False)
    plt.errorbar(np.arange(len(_1_back_rew_ch)), _1_back_rew_ch, yerr=_1_back_rew_ch_err, fmt='o', color = isl[4])
    plt.annotate(p_val_1_back_rew_ch, xy = (10,np.max(_1_back_rew_ch)+0.01))
    plt.xlim(-1,10)
    plt.title(' 1 Choice x Reward Back')
    plt.xticks(np.arange(10),np.arange(10)+1)
    plt.xlabel('Task')
    plt.ylabel('Coefficient')

  
    plt.subplot(1,5,5)
    #sns.boxplot(data=_all_back_rew_ch, palette="Set3",showfliers = False)
    plt.errorbar(np.arange(len(_all_back_rew_ch)), _all_back_rew_ch, yerr=_all_back_rew_ch_err, fmt='o', color = isl[4])
    plt.annotate(p_val_1_back_other_back_rew_ch, xy = (10,np.max(_all_back_rew_ch)+0.01))
    plt.xlim(-1,10)
    plt.xticks(np.arange(10),np.arange(10)+1)
    plt.xlabel('Task')
    plt.title(' 2 Choices x Rewards Back')
    plt.ylabel('Coefficient')

    sns.despine()
    plt.tight_layout()
Exemplo n.º 28
0
    ANOVA_list.append([ID, 'nogo', 'congruent', individ_acc[1]])
    ANOVA_list.append([ID, 'go', 'incongruent', individ_acc[2]])
    ANOVA_list.append([ID, 'nogo', 'incongruent', individ_acc[3]])

    for i in range(4):
        all_accuracies[i].append(individ_acc[i])

#ANOVA
data = pd.DataFrame(ANOVA_list,
                    columns=['pid', 'response', 'congruency', 'SbjACC'])

gpResult = data.groupby(['response', 'congruency']).SbjACC.mean().reset_index()
print(gpResult)

curr_ANOVA = AnovaRM(data, 'SbjACC', 'pid', within=['response', 'congruency'])
curr_ANOVA = curr_ANOVA.fit()
print(curr_ANOVA)

#Overall Analysis
all_accuracies_average = []

for i in range(len(all_accuracies)):
    all_accuracies_average.append(
        sum(all_accuracies[i]) / len(all_accuracies[i]))

print(all_accuracies_average)

all_accuracies_average = [
    all_accuracies_average[0], all_accuracies_average[2],
    all_accuracies_average[1], all_accuracies_average[3]
]
(g.set_axis_labels('Awareness', 'Probability').set(
    ylim=(0, 0.85)).set_titles("{row_name} | {col_name}").despine(left=True))
for ii, (target, df_sub) in enumerate(df_plot.groupby('attention')):
    #    formula = 'prob ~ C(correctness)*C(awareness)*C(confidence)'
    #    model = ols(formula, df_sub).fit()
    #    aov_table = anova_lm(model, typ=2)
    #    s = f"{target}, F({model.df_model: .0f},{model.df_resid: .0f}) = {model.fvalue: .3f}, p = {model.f_pvalue: .4f}"
    #    print(s)
    #    g.axes[ii][0].annotate(s,xy=(-0.45,.8))
    g.axes[ii][0].set(ylabel=f'Probability | {target}')
g.savefig(os.path.join(figure_dir, 'att.png'), dpi=400, bbox_inches='tight')

df_plot['level'] = df_plot['correctness'] + ', ' + df_plot[
    'awareness'] + ', ' + df_plot['confidence']
for target, df_sub in df_plot.groupby(['attention']):
    temp = {}
    df_sub = df_sub.sort_values(['sub', 'window', 'level'])
    for level, df_sub_sub in df_sub.groupby(['level']):
        #        print(df_sub_sub.shape)
        temp[level] = df_sub_sub['prob'].values
    for_j = pd.DataFrame(temp)
    for_j.to_csv(os.path.join(saving_dir,
                              f'{target} for jsp.csv'))  #,na_rep='NAN')

    aovrm = AnovaRM(df_sub,
                    'prob',
                    'sub',
                    within=['awareness', 'confidence', 'correctness'])
    res = aovrm.fit().summary().tables[0]
    res.to_csv(os.path.join(saving_dir, f'ANVOA report {target}.csv'))
    spark.sparkContext.setLogLevel("ERROR")

    print "\nspark session created sucessfully:: \n"

    dataset = spark.read.csv("/home/fidel/mltest/bank.csv",
                             header=True,
                             inferSchema=True)

    dataset.printSchema()
    df = pd.read_csv("/home/fidel/mltest/bank.csv", delimiter=";")
    print df.describe()

    ######creating the box plot
    #

    boxplot = df.boxplot('age', by='marital', figsize=(12, 8))

    df_anova = dataset.toPandas()

    mod = ols("age ~ housing", data=df_anova).fit()
    aov_table = sm.stats.anova_lm(mod, typ=2)
    print aov_table

    # using 1st test

    anovarm = AnovaRM(df_anova, "age", "default", within=["marital"])
    fit = anovarm.fit()
    fit.summary()

    # 2nd method
Exemplo n.º 31
0
    ANOVA_list.append([ID, 'nogo', 'congruent', individ_acc[1]])
    ANOVA_list.append([ID, 'go', 'incongruent', individ_acc[2]])
    ANOVA_list.append([ID, 'nogo', 'incongruent', individ_acc[3]])

    for i in range(4):
        all_accuracies[i].append(individ_acc[i])

#ANOVA
data = pd.DataFrame(ANOVA_list,
                    columns=['pid', 'response', 'congruency', 'SbjACC'])

gpResult = data.groupby(['response', 'congruency']).SbjACC.mean().reset_index()
print(gpResult)

prev_ANOVA = AnovaRM(data, 'SbjACC', 'pid', within=['response', 'congruency'])
prev_ANOVA = prev_ANOVA.fit()
print(prev_ANOVA)

all_accuracies_average = []

for i in range(len(all_accuracies)):
    all_accuracies_average.append(
        sum(all_accuracies[i]) / len(all_accuracies[i]))

print(all_accuracies_average)

#plot the relationship

labels = ('congruent go', 'congruent nogo', 'incongruent go',
          'incongruent nogo')
y_pos = np.arange(len(labels))