def test_pairwise_tukey(self): """Test function pairwise_tukey""" df = read_dataset('anova') stats = pairwise_tukey(dv='Pain threshold', between='Hair color', data=df) assert np.allclose([0.074, 0.435, 0.415, 0.004, 0.789, 0.037], stats.loc[:, 'p-tukey'].values.round(3), atol=0.05)
def test_circ_axial(self): """Test function circ_axial.""" df = read_dataset('circular') alpha = df['Orientation'].values alpha = circ_axial(np.deg2rad(alpha), 2) assert np.allclose( np.round(alpha, 4), [0, 0.7854, 1.5708, 2.3562, 3.1416, 3.9270, 4.7124, 5.4978])
def test_rmcorr(self): """Test function rm_corr""" df = read_dataset('rm_corr') # Test again rmcorr R package. r, p, dof = rm_corr(data=df, x='pH', y='PacO2', subject='Subject') assert r == -0.507 assert dof == 38 assert np.round(p, 3) == 0.001
def test_cochran(self): """Test function cochran""" from pingouin.datasets import read_dataset df = read_dataset('cochran') st = cochran(dv='Energetic', within='Time', subject='Subject', data=df) assert st.loc['cochran', 'Q'] == 6.706 cochran(dv='Energetic', within='Time', subject='Subject', data=df, export_filename='test_export.csv') # With a NaN value df.loc[2, 'Energetic'] = np.nan cochran(dv='Energetic', within='Time', subject='Subject', data=df)
def test_welch_anova(self): """Test function welch_anova.""" # Pain dataset df_pain = read_dataset('anova') aov = welch_anova(dv='Pain threshold', between='Hair color', data=df_pain, export_filename='test_export.csv') # Compare with R oneway.test function assert aov.loc[0, 'ddof1'] == 3 assert np.allclose(aov.loc[0, 'ddof2'], 8.330) assert np.allclose(aov.loc[0, 'F'], 5.890) assert np.allclose(np.round(aov.loc[0, 'p-unc'], 4), .0188)
def test_pairwise_gameshowell(self): """Test function pairwise_gameshowell""" df = read_dataset('anova') stats = pairwise_gameshowell(dv='Pain threshold', between='Hair color', data=df) # Compare with R package `userfriendlyscience` np.testing.assert_array_equal(np.abs(stats['T'].round(2)), [2.48, 1.42, 1.75, 4.09, 1.11, 3.56]) np.testing.assert_array_equal(stats['df'].round(2), [7.91, 7.94, 6.56, 8.0, 6.82, 6.77]) sig = stats['pval'].apply(lambda x: 'Yes' if x < 0.05 else 'No').values np.testing.assert_array_equal(sig, ['No', 'No', 'No', 'Yes', 'No', 'Yes'])
def test_pairwise_ttests(self): """Test function pairwise_ttests""" # Within + Between + Within * Between pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df, alpha=.01) pairwise_ttests(dv='Scores', within=['Time'], between=['Group'], subject='Subject', data=df, padjust='fdr_bh', return_desc=True) # Simple within pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df, return_desc=True) # Simple between pairwise_ttests(dv='Scores', between='Group', data=df, padjust='bonf', tail='one-sided', effsize='cohen', export_filename='test_export.csv') # Two between factors pairwise_ttests(dv='Scores', between=['Time', 'Group'], data=df, padjust='holm') # Two within subject factors pairwise_ttests(dv='Scores', within=['Group', 'Time'], subject='Subject', data=df, padjust='bonf') # Wrong tail argument with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between='Group', data=df, tail='wrong') # Wrong alpha argument with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between='Group', data=df, alpha='.05') # Both multiple between and multiple within with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between=['Time', 'Group'], within=['Time', 'Group'], subject='Subject', data=df) # Missing values df.iloc[[10, 15], 0] = np.nan pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df) # Wrong input argument df['Group'] = 'Control' with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between='Group', data=df) # Two within factors from other datasets and with NaN values df2 = read_dataset('rm_anova') pairwise_ttests(dv='DesireToKill', within=['Disgustingness', 'Frighteningness'], subject='Subject', padjust='holm', data=df2)
def test_ancova(self): """Test function ancova.""" df = read_dataset('ancova') aov = ancova(data=df, dv='Scores', covar='Income', between='Method') # Compare with statsmodels assert np.allclose(aov.loc[0, 'F'].round(3), 3.336) assert np.allclose(aov.loc[1, 'F'].round(3), 29.419) aov, bw = ancova(data=df, dv='Scores', covar='Income', between='Method', export_filename='test_export.csv', return_bw=True) ancova(data=df, dv='Scores', covar=['Income'], between='Method') ancova(data=df, dv='Scores', covar=['Income', 'BMI'], between='Method')
def test_intraclass_corr(self): """Test function intraclass_corr""" df = read_dataset('icc') intraclass_corr(df, 'Wine', 'Judge', 'Scores', ci=.68) icc, ci = intraclass_corr(df, 'Wine', 'Judge', 'Scores') assert np.round(icc, 3) == 0.728 assert ci[0] == .434 assert ci[1] == .927 with pytest.raises(ValueError): intraclass_corr(df, None, 'Judge', 'Scores') with pytest.raises(ValueError): intraclass_corr('error', 'Wine', 'Judge', 'Scores') with pytest.raises(ValueError): intraclass_corr(df, 'Wine', 'Judge', 'Judge') with pytest.raises(ValueError): intraclass_corr(df.drop(index=0), 'Wine', 'Judge', 'Scores')
def test_pairwise_corr(self): """Test function pairwise_corr""" # Load JASP Big 5 DataSets (remove subject column) data = read_dataset('pairwise_corr').iloc[:, 1:] stats = pairwise_corr(data=data, method='pearson', tail='two-sided') jasp_rval = [ -0.350, -0.01, -.134, -.368, .267, .055, .065, .159, -.013, .159 ] assert np.allclose(stats['r'].values, jasp_rval) assert stats['n'].values[0] == 500 # Correct for multiple comparisons pairwise_corr(data=data, method='spearman', tail='one-sided', padjust='bonf') # Export pairwise_corr(data=data, method='spearman', tail='one-sided', export_filename='test_export.csv') # Check with a subset of columns pairwise_corr(data=data, columns=['Neuroticism', 'Extraversion']) with pytest.raises(ValueError): pairwise_corr(data=data, tail='wrong') # Check with non-numeric columns data['test'] = 'test' pairwise_corr(data=data, method='pearson') # Check different variation of product / combination n = data.shape[0] data['Age'] = np.random.randint(18, 65, n) data['IQ'] = np.random.normal(105, 1, n) data['Gender'] = np.repeat(['M', 'F'], int(n / 2)) pairwise_corr(data, columns=['Neuroticism', 'Gender']) pairwise_corr(data, columns=['Neuroticism', 'Extraversion', 'Gender']) pairwise_corr(data, columns=['Neuroticism']) pairwise_corr(data, columns='Neuroticism') pairwise_corr(data, columns=[['Neuroticism']]) pairwise_corr(data, columns=[['Neuroticism'], None]) pairwise_corr(data, columns=[['Neuroticism', 'Gender'], ['Age']]) pairwise_corr(data, columns=[['Neuroticism'], ['Age', 'IQ']]) pairwise_corr(data, columns=[['Age', 'IQ'], []]) pairwise_corr(data, columns=['Age', 'Gender', 'IQ', 'Wrong']) pairwise_corr(data, columns=['Age', 'Gender', 'Wrong']) # Test with more than 1000 columns (BF10 not computed) data1500 = pd.concat([data, data, data], ignore_index=True) pcor1500 = pairwise_corr(data1500, method='pearson') assert 'BF10' not in pcor1500.keys()
def test_ancovan(self): """Test function ancovan.""" df = read_dataset('ancova') aov = ancovan(data=df, dv='Scores', covar=['Income', 'BMI'], between='Method') # Compare with statsmodels assert np.allclose(aov.loc[0, 'F'], 3.233) assert np.allclose(aov.loc[1, 'F'], 27.637) ancovan(data=df, dv='Scores', covar=['Income', 'BMI'], between='Method', export_filename='test_export.csv') ancovan(data=df, dv='Scores', covar=['Income'], between='Method') ancovan(data=df, dv='Scores', covar='Income', between='Method')
def test_corr(self): """Test function corr""" np.random.seed(123) mean, cov = [4, 6], [(1, .6), (.6, 1)] x, y = np.random.multivariate_normal(mean, cov, 30).T x[3], y[5] = 12, -8 corr(x, y, method='pearson', tail='one-sided') corr(x, y, method='spearman', tail='two-sided') corr(x, y, method='kendall') corr(x, y, method='shepherd', tail='two-sided') # Compare with robust corr toolbox stats = corr(x, y, method='skipped') assert np.round(stats['r'].values, 3) == 0.512 assert stats['outliers'].values == 2 stats = corr(x, y, method='shepherd') assert stats['outliers'].values == 2 _, _, outliers = skipped(x, y, method='pearson') assert outliers.size == x.size assert stats['n'].values == 30 stats = corr(x, y, method='percbend') assert np.round(stats['r'].values, 3) == 0.484 # Not normally distributed z = np.random.uniform(size=30) corr(x, z, method='pearson') # With NaN values x[3] = np.nan corr(x, y) # Wrong argument with pytest.raises(ValueError): corr(x, y, method='error') with pytest.raises(ValueError): corr(x, y[:-10]) # Compare with JASP df = read_dataset('pairwise_corr') stats = corr(df['Neuroticism'], df['Extraversion']) assert np.isclose(1 / stats['BF10'].values, 1.478e-13) # With more than 100 values to see if BF10 is computed xx, yy = np.random.multivariate_normal(mean, cov, 1500).T c1500 = corr(xx, yy) assert 'BF10' not in c1500.keys()
def test_anova(self): """Test function anova.""" # Pain dataset df_pain = read_dataset('anova') aov = anova(dv='Pain threshold', between='Hair color', data=df_pain, detailed=True, export_filename='test_export.csv') anova(dv='Pain threshold', between=['Hair color'], data=df_pain) # Compare with JASP assert np.allclose(aov.loc[0, 'F'], 6.791) assert np.allclose(np.round(aov.loc[0, 'p-unc'], 3), .004) assert np.allclose(aov.loc[0, 'np2'], .576) # Two-way ANOVA anova(dv='Scores', between=['Group', 'Time'], data=df, export_filename='test_export.csv') anova2(dv='Scores', between=['Group', 'Time'], data=df) anova2(dv='Scores', between=['Group'], data=df) anova2(dv='Scores', between='Group', data=df)
import pandas as pd import numpy as np from unittest import TestCase from pingouin.parametric import (ttest, anova, anova2, rm_anova, mixed_anova, rm_anova2, ancova, welch_anova, ancovan) from pingouin.datasets import read_dataset # Generate random data for ANOVA df = read_dataset('mixed_anova.csv') df_nan = df.copy() df_nan.iloc[[4, 15], 0] = np.nan # Create random normal variables np.random.seed(1234) x = np.random.normal(scale=1., size=100) y = np.random.normal(scale=0.8, size=100) class TestParametric(TestCase): """Test parametric.py.""" def test_ttest(self): """Test function ttest""" h = np.random.normal(scale=0.9, size=95) ttest(x, 0.5) stats = ttest(x, y, paired=True, tail='one-sided') # Compare with JASP assert np.allclose(stats.loc['T-test', 'T'], 0.616) assert np.allclose(stats.loc['T-test', 'p-val'].round(3), .270) ttest(x, y, paired=False, correction='auto')
import pytest import numpy as np from numpy.testing import assert_almost_equal, assert_equal from unittest import TestCase from pingouin.regression import (linear_regression, logistic_regression, mediation_analysis) from pingouin.datasets import read_dataset from scipy.stats import linregress from sklearn.linear_model import LinearRegression df = read_dataset('mediation') class TestRegression(TestCase): """Test regression.py.""" def test_linear_regression(self): """Test function linear_regression.""" # Simple regression lm = linear_regression(df['X'], df['Y']) # Pingouin sc = linregress(df['X'].values, df['Y'].values) # SciPy assert_equal(lm['names'].values, ['Intercept', 'X']) assert_almost_equal(lm['coef'][1], sc.slope) assert_almost_equal(lm['coef'][0], sc.intercept) assert_almost_equal(lm['se'][1], sc.stderr) assert_almost_equal(lm['pval'][1], sc.pvalue) assert_almost_equal(np.sqrt(lm['r2'][0]), sc.rvalue) # Multiple regression with intercept X = df[['X', 'M']].values