Python hypothesize示例，tea.hypothesize Python示例

示例#1

0

显示文件

def test_paired_t_test():
    data_path = get_data_path('spiderLong_within.csv')

    # Declare and annotate the variables of interest
    variables = [{
        'name': 'Group',
        'data type': 'nominal',
        'categories': ['Picture', 'Real Spider']
    }, {
        'name': 'Anxiety',
        'data type': 'ratio'
    }]
    experimental_design = {
        'study type': 'experiment',
        'independent variables': 'Group',
        'dependent variables': 'Anxiety',
        'within subjects': 'Group'
    }
    assumptions = {'Type I (False Positive) Error Rate': 0.05}

    tea.data(data_path, key="id")
    tea.define_variables(variables)
    # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions)

    tea.hypothesize(['Group', 'Anxiety'], ['Group:Real Spider > Picture'])

    # print("\nfrom Field et al.")
    # print("Expected outcome: Paired/Dependent t-test")
    print('++++++++++++')

示例#2

0

显示文件

def test_wilcoxon_signed_rank():
    data_path = get_data_path('alcohol.csv')

    # Declare and annotate the variables of interest
    variables = [{
        'name': 'drug',
        'data type': 'nominal',
        'categories': ['Alcohol']
    }, {
        'name': 'day',
        'data type': 'nominal',
        'categories': ['sundayBDI', 'wedsBDI']
    }, {
        'name': 'value',
        'data type': 'ratio'
    }]
    experimental_design = {
        'study type': 'experiment',
        'independent variables': 'day',
        'dependent variables': 'value',
        'within subjects': 'day'
    }
    assumptions = {'Type I (False Positive) Error Rate': 0.05}

    tea.data(data_path)
    tea.define_variables(variables)
    # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions)

    tea.hypothesize(['day', 'value'], ['day:sundayBDI != wedsBDI'])

    # print("\nfrom Field et al.")
    # print("Expected outcome: Wilcoxon signed rank test")
    print('++++++++++++')

示例#3

0

显示文件

def test_kruskall_wallis(): 
    data_path = get_data_path('soya.csv')

    # Declare and annotate the variables of interest
    variables = [
        {
            'name' : 'Sperm',
            'data type' : 'interval'
        },
        {
            'name' : 'Soya',
            'data type' : 'ordinal',
            'categories': ['No Soya', '1 Soya Meal', '4 Soya Meals', '7 Soya Meals']
        }
    ]
    experimental_design = {
                            'study type': 'experiment',
                            'independent variables': 'Soya',
                            'dependent variables': 'Sperm',
                            'between subjects': 'Soya'
                        }
    assumptions = {
        'Type I (False Positive) Error Rate': 0.05,
    }

    tea.data(data_path)
    tea.define_variables(variables)
    tea.define_study_design(experimental_design) # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.assume(assumptions)

    tea.hypothesize(['Soya', 'Sperm'])

    print("\nFrom Field et al.")
    print("Expected outcome: Kruskall Wallis")

示例#4

0

显示文件

def test_f_test():
    data_path = get_data_path('cholesterol.csv')

    # Declare and annotate the variables of interest
    variables = [{
        'name': 'trt',
        'data type': 'nominal',
        'categories': ['1time', '2times', '4times', 'drugD', 'drugE']
    }, {
        'name': 'response',
        'data type': 'ratio'
    }]
    experimental_design = {
        'study type': 'experiment',
        'independent variables': 'trt',
        'dependent variables': 'response',
        'between subjects': 'trt'
    }
    assumptions = {
        'Type I (False Positive) Error Rate': 0.05,
    }

    tea.data(data_path)
    tea.define_variables(variables)
    # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions)

    tea.hypothesize(['trt', 'response'])
    # print("\nFrom Field et al.")
    # print("Expected outcome: Oneway ANOVA (F) test")
    print('++++++++++++')

示例#5

0

显示文件

def test_wilcoxon_signed_rank_1():
    tea.data('./tests/data/real_stats_1.csv')

    variables = [{
        'name': 'Subject',
        'data type': 'ratio'
    }, {
        'name': 'Source',
        'data type': 'nominal',
        'categories': ['Memory', 'Median']
    }, {
        'name': 'Score',
        'data type': 'ratio'
    }]
    experimental_design = {
        'study type': 'experiment',
        'independent variables': 'Source',
        'dependent variables': 'Score',
        'within subjects': 'Source'
    }
    assumptions = {'Type I (False Positive) Error Rate': 0.05}

    tea.define_variables(variables)
    # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions)

    tea.hypothesize(['Source', 'Score'], ['Source:Memory != Median'])

示例#6

0

显示文件

def test_wilcoxon_signed_rank_2():
    tea.data('./tests/data/real_stats_2.csv', key='Couple')

    variables = [{
        'name': 'Couple',
        'data type': 'ratio'
    }, {
        'name': 'Person',
        'data type': 'nominal',
        'categories': ['Wife', 'Husband']
    }, {
        'name': 'Score',
        'data type': 'ratio'
    }]
    experimental_design = {
        'study type': 'observational study',
        'contributor variables': 'Person',
        'outcome variables': 'Score',
        'within subjects': 'Person'
    }
    assumptions = {'Type I (False Positive) Error Rate': 0.05}

    tea.define_variables(variables)
    # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions)

    tea.hypothesize(['Person', 'Score'], ['Person:Wife != Husband'])

示例#7

0

显示文件

def test_indep_t_test():
    data_path = get_data_path('UScrime.csv')

    # Declare and annotate the variables of interest
    variables = [{
        'name': 'So',
        'data type': 'nominal',
        'categories': ['0', '1']
    }, {
        'name': 'Prob',
        'data type': 'ratio',
        'range': [0, 1]
    }]
    experimental_design = {
        'study type': 'observational study',
        'contributor variables': 'So',
        'outcome variables': 'Prob',
    }
    assumptions = {
        'Type I (False Positive) Error Rate': 0.05,
        'groups normally distributed': [['Prob', 'So']]
    }

    transformations = {'log': ['Prob']}

    tea.data(data_path)
    tea.define_variables(variables)
    # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions)

    tea.hypothesize(['So', 'Prob'], ['So:1 > 0'])  # Southern is greater
    # print("\nfrom Kabacoff")
    # print("Expected outcome: Student's t-test")
    print('++++++++++++')

示例#8

0

显示文件

def test_chi_square_with_dataframe():
    data_path = get_data_path('catsData.csv')

    data_frame = pd.read_csv(data_path)

    # Declare and annotate the variables of interest
    variables = [{
        'name': 'Training',
        'data type': 'nominal',
        'categories': ['Food as Reward', 'Affection as Reward']
    }, {
        'name': 'Dance',
        'data type': 'nominal',
        'categories': ['Yes', 'No']
    }]
    experimental_design = {
        'study type': 'observational study',
        'contributor variables': 'Training',
        'outcome variables': 'Dance'
    }
    assumptions = {
        'Type I (False Positive) Error Rate': 0.05,
    }

    tea.data(data_frame)  # Passes data_frame instead of data_path
    tea.define_variables(variables)
    # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions)

    tea.hypothesize(['Training', 'Dance'])
    # print('Chi square')
    print('++++++++++++')

示例#9

0

显示文件

def test_pearson_corr():
    data_path = get_data_path('statex77.csv')
    # data_path2 = get_data_path('statex87.csv')

    # Declare and annotate the variables of interest
    variables = [{
        'name': 'Illiteracy',
        'data type': 'interval',
        'categories': [0, 100]
    }, {
        'name': 'Life Exp',
        'data type': 'ratio',
    }]
    experimental_design = {
        'study type': 'observational study',
        'contributor variables': ['Illiteracy', 'Life Exp'],
        'outcome variables': ''
    }
    assumptions = {
        'Type I (False Positive) Error Rate': 0.05,
        'normal distribution': ['Illiteracy']
    }

    tea.data(data_path)
    # tea.data(data_path2)
    tea.define_variables(variables)
    # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions, 'strict')

    results = tea.hypothesize(['Illiteracy', 'Life Exp'],
                              ['Illiteracy ~ Life Exp'])
    # print("\nfrom Kabacoff")
    # print("Expected outcome: Pearson")
    print('++++++++++++')

示例#10

0

显示文件

def test_pearson_corr_2(): 
    data_path = get_data_path('exam.csv')

    # Declare and annotate the variables of interest
    variables = [
        {
            'name' : 'Exam',
            'data type' : 'ratio',
            'range' : [0, 100]
        },
        {
            'name' : 'Anxiety',
            'data type' : 'interval',
            'range' : [0, 100]
        },
        {
            'name' : 'Gender',
            'data type' : 'nominal',
            'categories' : ['Male', 'Female']
        },
        {
            'name' : 'Revise',
            'data type' : 'ratio'
        }
    ]
    experimental_design = {
                            'study type': 'observational study',
                            'contributor variables': ['Anxiety', 'Gender', 'Revise'],
                            'outcome variables': 'Exam'
                        }
    assumptions = {
        'Type I (False Positive) Error Rate': 0.05,
    }

    tea.data(data_path)
    tea.define_variables(variables)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions)

    results = tea.hypothesize(['Anxiety', 'Exam'])
    results = tea.hypothesize(['Revise', 'Exam'])
    results = tea.hypothesize(['Anxiety', 'Revise'])
    print("\nfrom Field et al.")
    print("Expected outcome: Pearson")

示例#11

0

显示文件

def test_two_way_anova():
    data_path = get_data_path('co2.csv')

    # Declare and annotate the variables of interest
    variables = [{
        'name': 'uptake',
        'data type': 'interval'
    }, {
        'name': 'Type',
        'data type': 'nominal',
        'categories': ['Quebec', 'Mississippi']
    }, {
        'name': 'conc',
        'data type': 'ordinal',
        'categories': [95, 175, 250, 350, 500, 675, 1000]
    }]
    experimental_design = {
        'study type': 'experiment',
        'independent variables': ['Type', 'conc'],
        'dependent variables': 'uptake',
        'within subjects': 'conc',
        'between subjects': 'Type'
    }
    assumptions = {
        'Type I (False Positive) Error Rate': 0.05,
        'groups normally distributed': [['Type', 'uptake'], ['Type', 'conc']],
        'equal variance': [['Type', 'uptake'], ['conc', 'uptake']]
    }

    tea.data(data_path)
    tea.define_variables(variables)
    tea.define_study_design(
        experimental_design
    )  # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.assume(assumptions, mode='relaxed')

    tea.hypothesize(['uptake', 'conc',
                     'Type'])  # Fails: not all groups are normal
    #Type main effect?
    # print('Supposed to be 2 way ANOVA')
    print('++++++++++++')

示例#12

0

显示文件

def test_rm_one_way_anova(): 
    data_path = get_data_path('co2.csv')

    # Declare and annotate the variables of interest
    variables = [
        {
            'name' : 'uptake',
            'data type' : 'interval'
        },
        {
            'name' : 'Type',
            'data type' : 'nominal',
            'categories': ['Quebec', 'Mississippi']
        },
        {
            'name' : 'conc',
            'data type' : 'ordinal',
            'categories': [95, 175, 250, 350, 500, 675, 1000]
        }
    ]
    experimental_design = {
                            'study type': 'experiment',
                            'independent variables': ['Type', 'conc'],
                            'dependent variables': 'uptake',
                            'within subjects': 'conc',
                            'between subjects': 'Type'
                        }
    assumptions = {
        'Type I (False Positive) Error Rate': 0.05,
    }

    tea.data(data_path, key="Plant")
    tea.define_variables(variables)
    tea.define_study_design(experimental_design) # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.assume(assumptions)

    tea.hypothesize(['uptake', 'conc'])

    print("\nFrom Field et al.")
    print("Expected outcome: Repeated Measures One Way ANOVA")

示例#13

0

显示文件

def test_factorial_anova():
    data_path = get_data_path('gogglesData.csv')

    # Declare and annotate the variables of interest
    variables = [
        {
            'name' : 'gender',
            'data type' : 'nominal',
            'categories' : ['Female', 'Male']
        },
        {
            'name' : 'alcohol',
            'data type' : 'nominal',
            'categories': ['None', '2 Pints', '4 Pints']
        },
        {
            'name' : 'attractiveness',
            'data type' : 'interval'
        }
    ]
    experimental_design = {
                            'study type': 'experiment',
                            'independent variables': ['gender', 'alcohol'],
                            'dependent variables': 'attractiveness',
                            'between subjects': ['gender', 'alcohol']
                        }
    assumptions = {
        'Type I (False Positive) Error Rate': 0.05,
    }

    tea.data(data_path)
    tea.define_variables(variables)
    tea.define_study_design(experimental_design) # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.assume(assumptions)

    tea.hypothesize(['attractiveness', 'gender', 'alcohol']) 
    # alcohol main effect?
    print("\nFrom Field et al.")
    print("Expected outcome: Factorial ANOVA")

示例#14

0

显示文件

def test_pointbiserial_corr():
    data_path = get_data_path('pbcorr.csv')

    # Declare and annotate the variables of interest
    variables = [
        {
            'name': 'time',
            'data type': 'ratio'
        },
        {
            'name': 'gender',
            'data type': 'nominal',
            'categories': [0, 1]  # ordered from lowest to highest
        },
        {
            'name': 'recode',
            'data type': 'nominal',
            'categories': [0, 1]
        }
    ]
    experimental_design = {
        'study type': 'observational study',
        'contributor variables': ['gender', 'recode'],
        'outcome variables': 'time'
    }
    assumptions = {
        'Type I (False Positive) Error Rate': 0.05,
    }

    tea.data(data_path)
    tea.define_variables(variables)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions)

    # I think this works!?
    tea.hypothesize(['time', 'gender'], ['gender:1 > 0'])
    # print("\nfrom Field et al.")
    # print("Expected outcome: Pointbiserial")
    print('++++++++++++')

示例#15

0

显示文件

def test_mann_whitney_0():
    tea.data('./tests/data/real_stats_3.csv')

    variables = [{
        'name': 'Treatment',
        'data type': 'nominal',
        'categories': ['Control', 'Drug']
    }, {
        'name': 'Score',
        'data type': 'ratio'
    }]
    experimental_design = {
        'study type': 'experiment',
        'independent variables': 'Treatment',
        'dependent variables': 'Score'
    }
    assumptions = {'Type I (False Positive) Error Rate': 0.05}

    tea.define_variables(variables)
    # Allows for using multiple study designs for the same dataset (could lead to phishing but also practical for saving analyses and reusing as many parts of analyses as possible)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions)
    tea.hypothesize(['Treatment', 'Score'], ['Treatment:Control != Drug'])

示例#16

0

显示文件

def test_kendall_tau_corr():
    data_path = get_data_path('liar.csv')

    # Declare and annotate the variables of interest
    variables = [
        {
            'name': 'Creativity',
            'data type': 'interval'
        },
        {
            'name': 'Position',
            'data type': 'ordinal',
            'categories': [6, 5, 4, 3, 2, 1]  # ordered from lowest to highest
        },
        {
            'name': 'Novice',
            'data type': 'nominal',
            'categories': [0, 1]
        }
    ]
    experimental_design = {
        'study type': 'observational study',
        'contributor variables': ['Novice', 'Creativity'],
        'outcome variables': 'Position'
    }
    assumptions = {
        'Type I (False Positive) Error Rate': 0.05,
    }

    tea.data(data_path)
    tea.define_variables(variables)
    tea.define_study_design(experimental_design)
    tea.assume(assumptions)

    results = tea.hypothesize(
        ['Position', 'Creativity'],
        ['Position:1 > 6', 'Position:1 > 2'])  # I think this works!?
    # print("\nfrom Field et al.")
    # print("Expected outcome: Kendall Tau")
    print('++++++++++++')

示例#17

0

显示文件

        'categories': ['SE', 'SD']
    },
    {
        'name': 'text_condition',
        'data type': 'nominal',
        'categories': ['show_both_stats', 'show_viz_stats_only']
    },
    {
        'name': 'wtp_final',
        'data type': 'interval'
    },
    {
        'name': 'superiority_special',
        'data type': 'ratio',
        'range': [0, 1]
    }
]

tea.define_variables(variables)

study_design = {
    'study_type': 'experiment',
    'independent variables': ['condition', 'text_condition'],
    'dependent variables': 'wtp_final'
}

tea.define_study_design(study_design)

tea.hypothesize(['condition', 'wtp_final'], ['condition: SE > SD'])

示例#18

0

显示文件

study_design = {
    'study type': 'observational study',
    'contributor variables': ['Sport', 'Sex'],
    'outcome variables': 'Weight',
}

assumptions = {
    # 'groups normally distributed': [['Sport', 'Weight']],
    'Type I (False Positive) Error Rate': 0.05,
}

tea.data(data_path, key='ID')
tea.define_variables(variables)
tea.define_study_design(study_design)
tea.assume(assumptions)
tea.hypothesize(['Sport', 'Weight'], ['Sport:Wrestling > Swimming'])
tea.hypothesize(['Sex', 'Weight'], ['Sex:F < M'])
'''
Results:
--------------
Test: mannwhitney_u
***Test assumptions:
Exactly one explanatory variable: Sex
Exactly one explained variable: Weight
Independent (not paired) observations: Sex
Variable is categorical: Sport
Variable has two categories: Sport
Continuous OR ORDINAL (not nominal) data: Weight

***Test results:
name = Mann Whitney U Test

示例#19

0

显示文件

文件： drug_small_tea.py 项目： pkuleon/tea-lang

study_design = {
    'study type': 'observational study',
    'contributor variables': ['drug', 'sundayBDI'],
    'outcome variables': ['BDIchange', 'wedsBDI']
}

assumptions = {
    'Type I (False Positive) Error Rate': 0.01
}

tea.data(df)
tea.define_variables(variables)
tea.define_study_design(study_design)
tea.assume(assumptions)
tea.hypothesize(['drug', 'wedsBDI'], ['drug:Ecstasy > Alcohol'])
tea.hypothesize(['sundayBDI', 'BDIchange'], ['sundayBDI ~ BDIchange'])

'''
Results:
--------------
Test: pointbiserial_corr_a
***Test assumptions:
Exactly two variables involved in analysis: drug, wedsBDI
Continuous (not categorical) data: wedsBDI
Normal distribution: wedsBDI: NormalTest(W=0.7817826867103577, p_value=0.04004703089594841)
Variable is categorical: drug
Variable has two categories: drug
Equal variance: drug, wedsBDI

***Test results:

示例#20

0

显示文件

文件： co2_tea.py 项目： pkuleon/tea-lang

study_design = {
    'study type': 'observational study',
    'contributor variables': 'Plant',
    'outcome variables': 'uptake'
}

assumptions = {
    'Type I (False Positive) Error Rate': 0.05,
}

tea.data(data_path, key='Id')
tea.define_variables(variables)
tea.define_study_design(study_design)
tea.assume(assumptions)
tea.hypothesize(['Plant', 'uptake'], ['Plant: Qn1 < Qn2', 'Plant: Qc2 < Qc3'])
'''
Results:
--------------
Test: kruskall_wallis
***Test assumptions:
Independent (not paired) observations: Plant
Exactly one explanatory variable: Plant
Exactly one explained variable: uptake
Continuous (not categorical) data: uptake
Variable is categorical: Plant
Variable has two or more categories: Plant

***Test results:
name = Kruskall Wallis
test_statistic = 6.89813

示例#21

0

显示文件

文件： ar_tv_tea.py 项目： pkuleon/tea-lang

    'categories': [1, 2, 3, 4, 5]
}]

experimental_design = {
    'study type': 'experiment',
    'independent variables': 'Condition',
    'dependent variables': 'Score'
}

assumptions = {'Type I (False Positive) Error Rate': 0.01969}

tea.data(data_path, key='ID')
tea.define_variables(variables)
tea.define_study_design(experimental_design)
tea.assume(assumptions)
results = tea.hypothesize(['Score', 'Condition'], ['Condition:AR > TV'])
'''
Results:
--------------
Test: mannwhitney_u
***Test assumptions:
Exactly one explanatory variable: Condition
Exactly one explained variable: Score
Independent (not paired) observations: Condition
Variable is categorical: Condition
Variable has two categories: Condition
Continuous OR ORDINAL (not nominal) data: Score

***Test results:
name = Mann Whitney U Test
test_statistic = 442.50000

示例#22

0

显示文件

文件： crime_tea.py 项目： pkuleon/tea-lang

    'contributor variables': ['So', 'Prob'],
    'outcome variables': ['Prob', 'Ineq']
}

assumptions = {
    # 'equal variance': [['So', 'Ineq']],
    # 'groups normally distributed': [['So', 'Prob']],
    'Type I (False Positive) Error Rate': 0.05
}

tea.define_variables(variables)
tea.define_study_design(study_design)
tea.assume(assumptions)
# tea.hypothesize(['So', 'Ineq'], ['So:1 > 0'])
# tea.hypothesize(['So', 'Prob'], ['So:1 > 0'])
tea.hypothesize(['Ineq', 'Prob'], ['Ineq ~ -Prob'])
'''
Results:
--------------
Test: kendalltau_corr
***Test assumptions:
Exactly two variables involved in analysis: Prob, Ineq
Continuous OR ORDINAL (not nominal) data: Prob
Continuous OR ORDINAL (not nominal) data: Ineq

***Test results:
name = Kendall's Tau Correlation
test_statistic = 0.39611
p_value = 0.00009
adjusted_p_value = 0.00009
alpha = 0.05