Python read_dataset примеры, pingouin.datasets.read_dataset Python примеры использования

Пример #1

0

Показать файл

Файл: test_pairwise.py Проект: heyifei1984/pingouin

 def test_pairwise_tukey(self):
     """Test function pairwise_tukey"""
     df = read_dataset('anova')
     stats = pairwise_tukey(dv='Pain threshold', between='Hair color',
                            data=df)
     assert np.allclose([0.074, 0.435, 0.415, 0.004, 0.789, 0.037],
                        stats.loc[:, 'p-tukey'].values.round(3), atol=0.05)

Пример #2

0

Показать файл

 def test_circ_axial(self):
     """Test function circ_axial."""
     df = read_dataset('circular')
     alpha = df['Orientation'].values
     alpha = circ_axial(np.deg2rad(alpha), 2)
     assert np.allclose(
         np.round(alpha, 4),
         [0, 0.7854, 1.5708, 2.3562, 3.1416, 3.9270, 4.7124, 5.4978])

Пример #3

0

Показать файл

Файл: test_correlation.py Проект: heyifei1984/pingouin

 def test_rmcorr(self):
     """Test function rm_corr"""
     df = read_dataset('rm_corr')
     # Test again rmcorr R package.
     r, p, dof = rm_corr(data=df, x='pH', y='PacO2', subject='Subject')
     assert r == -0.507
     assert dof == 38
     assert np.round(p, 3) == 0.001

Пример #4

0

Показать файл

Файл: test_nonparametric.py Проект: heyifei1984/pingouin

 def test_cochran(self):
     """Test function cochran"""
     from pingouin.datasets import read_dataset
     df = read_dataset('cochran')
     st = cochran(dv='Energetic', within='Time', subject='Subject', data=df)
     assert st.loc['cochran', 'Q'] == 6.706
     cochran(dv='Energetic', within='Time', subject='Subject', data=df,
             export_filename='test_export.csv')
     # With a NaN value
     df.loc[2, 'Energetic'] = np.nan
     cochran(dv='Energetic', within='Time', subject='Subject', data=df)

Пример #5

0

Показать файл

Файл: test_parametric.py Проект: heyifei1984/pingouin

 def test_welch_anova(self):
     """Test function welch_anova."""
     # Pain dataset
     df_pain = read_dataset('anova')
     aov = welch_anova(dv='Pain threshold',
                       between='Hair color',
                       data=df_pain,
                       export_filename='test_export.csv')
     # Compare with R oneway.test function
     assert aov.loc[0, 'ddof1'] == 3
     assert np.allclose(aov.loc[0, 'ddof2'], 8.330)
     assert np.allclose(aov.loc[0, 'F'], 5.890)
     assert np.allclose(np.round(aov.loc[0, 'p-unc'], 4), .0188)

Пример #6

0

Показать файл

Файл: test_pairwise.py Проект: heyifei1984/pingouin

 def test_pairwise_gameshowell(self):
     """Test function pairwise_gameshowell"""
     df = read_dataset('anova')
     stats = pairwise_gameshowell(dv='Pain threshold', between='Hair color',
                                  data=df)
     # Compare with R package `userfriendlyscience`
     np.testing.assert_array_equal(np.abs(stats['T'].round(2)),
                                   [2.48, 1.42, 1.75, 4.09, 1.11, 3.56])
     np.testing.assert_array_equal(stats['df'].round(2),
                                   [7.91, 7.94, 6.56, 8.0, 6.82, 6.77])
     sig = stats['pval'].apply(lambda x: 'Yes' if x < 0.05 else 'No').values
     np.testing.assert_array_equal(sig, ['No', 'No', 'No', 'Yes', 'No',
                                         'Yes'])

Пример #7

0

Показать файл

Файл: test_pairwise.py Проект: heyifei1984/pingouin

    def test_pairwise_ttests(self):
        """Test function pairwise_ttests"""
        # Within + Between + Within * Between
        pairwise_ttests(dv='Scores', within='Time', between='Group',
                        subject='Subject', data=df, alpha=.01)
        pairwise_ttests(dv='Scores', within=['Time'], between=['Group'],
                        subject='Subject', data=df, padjust='fdr_bh',
                        return_desc=True)
        # Simple within
        pairwise_ttests(dv='Scores', within='Time', subject='Subject',
                        data=df, return_desc=True)
        # Simple between
        pairwise_ttests(dv='Scores', between='Group',
                        data=df, padjust='bonf', tail='one-sided',
                        effsize='cohen', export_filename='test_export.csv')

        # Two between factors
        pairwise_ttests(dv='Scores', between=['Time', 'Group'], data=df,
                        padjust='holm')

        # Two within subject factors
        pairwise_ttests(dv='Scores', within=['Group', 'Time'],
                        subject='Subject', data=df, padjust='bonf')

        # Wrong tail argument
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df,
                            tail='wrong')
        # Wrong alpha argument
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df, alpha='.05')

        # Both multiple between and multiple within
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between=['Time', 'Group'],
                            within=['Time', 'Group'], subject='Subject',
                            data=df)

        # Missing values
        df.iloc[[10, 15], 0] = np.nan
        pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df)
        # Wrong input argument
        df['Group'] = 'Control'
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df)

        # Two within factors from other datasets and with NaN values
        df2 = read_dataset('rm_anova')
        pairwise_ttests(dv='DesireToKill',
                        within=['Disgustingness', 'Frighteningness'],
                        subject='Subject', padjust='holm', data=df2)

Пример #8

0

Показать файл

Файл: test_parametric.py Проект: heyifei1984/pingouin

 def test_ancova(self):
     """Test function ancova."""
     df = read_dataset('ancova')
     aov = ancova(data=df, dv='Scores', covar='Income', between='Method')
     # Compare with statsmodels
     assert np.allclose(aov.loc[0, 'F'].round(3), 3.336)
     assert np.allclose(aov.loc[1, 'F'].round(3), 29.419)
     aov, bw = ancova(data=df,
                      dv='Scores',
                      covar='Income',
                      between='Method',
                      export_filename='test_export.csv',
                      return_bw=True)
     ancova(data=df, dv='Scores', covar=['Income'], between='Method')
     ancova(data=df, dv='Scores', covar=['Income', 'BMI'], between='Method')

Пример #9

0

Показать файл

Файл: test_correlation.py Проект: heyifei1984/pingouin

 def test_intraclass_corr(self):
     """Test function intraclass_corr"""
     df = read_dataset('icc')
     intraclass_corr(df, 'Wine', 'Judge', 'Scores', ci=.68)
     icc, ci = intraclass_corr(df, 'Wine', 'Judge', 'Scores')
     assert np.round(icc, 3) == 0.728
     assert ci[0] == .434
     assert ci[1] == .927
     with pytest.raises(ValueError):
         intraclass_corr(df, None, 'Judge', 'Scores')
     with pytest.raises(ValueError):
         intraclass_corr('error', 'Wine', 'Judge', 'Scores')
     with pytest.raises(ValueError):
         intraclass_corr(df, 'Wine', 'Judge', 'Judge')
     with pytest.raises(ValueError):
         intraclass_corr(df.drop(index=0), 'Wine', 'Judge', 'Scores')

Пример #10

0

Показать файл

Файл: test_pairwise.py Проект: medic20/pingouin

 def test_pairwise_corr(self):
     """Test function pairwise_corr"""
     # Load JASP Big 5 DataSets (remove subject column)
     data = read_dataset('pairwise_corr').iloc[:, 1:]
     stats = pairwise_corr(data=data, method='pearson', tail='two-sided')
     jasp_rval = [
         -0.350, -0.01, -.134, -.368, .267, .055, .065, .159, -.013, .159
     ]
     assert np.allclose(stats['r'].values, jasp_rval)
     assert stats['n'].values[0] == 500
     # Correct for multiple comparisons
     pairwise_corr(data=data,
                   method='spearman',
                   tail='one-sided',
                   padjust='bonf')
     # Export
     pairwise_corr(data=data,
                   method='spearman',
                   tail='one-sided',
                   export_filename='test_export.csv')
     # Check with a subset of columns
     pairwise_corr(data=data, columns=['Neuroticism', 'Extraversion'])
     with pytest.raises(ValueError):
         pairwise_corr(data=data, tail='wrong')
     # Check with non-numeric columns
     data['test'] = 'test'
     pairwise_corr(data=data, method='pearson')
     # Check different variation of product / combination
     n = data.shape[0]
     data['Age'] = np.random.randint(18, 65, n)
     data['IQ'] = np.random.normal(105, 1, n)
     data['Gender'] = np.repeat(['M', 'F'], int(n / 2))
     pairwise_corr(data, columns=['Neuroticism', 'Gender'])
     pairwise_corr(data, columns=['Neuroticism', 'Extraversion', 'Gender'])
     pairwise_corr(data, columns=['Neuroticism'])
     pairwise_corr(data, columns='Neuroticism')
     pairwise_corr(data, columns=[['Neuroticism']])
     pairwise_corr(data, columns=[['Neuroticism'], None])
     pairwise_corr(data, columns=[['Neuroticism', 'Gender'], ['Age']])
     pairwise_corr(data, columns=[['Neuroticism'], ['Age', 'IQ']])
     pairwise_corr(data, columns=[['Age', 'IQ'], []])
     pairwise_corr(data, columns=['Age', 'Gender', 'IQ', 'Wrong'])
     pairwise_corr(data, columns=['Age', 'Gender', 'Wrong'])
     # Test with more than 1000 columns (BF10 not computed)
     data1500 = pd.concat([data, data, data], ignore_index=True)
     pcor1500 = pairwise_corr(data1500, method='pearson')
     assert 'BF10' not in pcor1500.keys()

Пример #11

0

Показать файл

Файл: test_parametric.py Проект: heyifei1984/pingouin

 def test_ancovan(self):
     """Test function ancovan."""
     df = read_dataset('ancova')
     aov = ancovan(data=df,
                   dv='Scores',
                   covar=['Income', 'BMI'],
                   between='Method')
     # Compare with statsmodels
     assert np.allclose(aov.loc[0, 'F'], 3.233)
     assert np.allclose(aov.loc[1, 'F'], 27.637)
     ancovan(data=df,
             dv='Scores',
             covar=['Income', 'BMI'],
             between='Method',
             export_filename='test_export.csv')
     ancovan(data=df, dv='Scores', covar=['Income'], between='Method')
     ancovan(data=df, dv='Scores', covar='Income', between='Method')

Пример #12

0

Показать файл

Файл: test_correlation.py Проект: heyifei1984/pingouin

 def test_corr(self):
     """Test function corr"""
     np.random.seed(123)
     mean, cov = [4, 6], [(1, .6), (.6, 1)]
     x, y = np.random.multivariate_normal(mean, cov, 30).T
     x[3], y[5] = 12, -8
     corr(x, y, method='pearson', tail='one-sided')
     corr(x, y, method='spearman', tail='two-sided')
     corr(x, y, method='kendall')
     corr(x, y, method='shepherd', tail='two-sided')
     # Compare with robust corr toolbox
     stats = corr(x, y, method='skipped')
     assert np.round(stats['r'].values, 3) == 0.512
     assert stats['outliers'].values == 2
     stats = corr(x, y, method='shepherd')
     assert stats['outliers'].values == 2
     _, _, outliers = skipped(x, y, method='pearson')
     assert outliers.size == x.size
     assert stats['n'].values == 30
     stats = corr(x, y, method='percbend')
     assert np.round(stats['r'].values, 3) == 0.484
     # Not normally distributed
     z = np.random.uniform(size=30)
     corr(x, z, method='pearson')
     # With NaN values
     x[3] = np.nan
     corr(x, y)
     # Wrong argument
     with pytest.raises(ValueError):
         corr(x, y, method='error')
     with pytest.raises(ValueError):
         corr(x, y[:-10])
     # Compare with JASP
     df = read_dataset('pairwise_corr')
     stats = corr(df['Neuroticism'], df['Extraversion'])
     assert np.isclose(1 / stats['BF10'].values, 1.478e-13)
     # With more than 100 values to see if BF10 is computed
     xx, yy = np.random.multivariate_normal(mean, cov, 1500).T
     c1500 = corr(xx, yy)
     assert 'BF10' not in c1500.keys()

Пример #13

0

Показать файл

Файл: test_parametric.py Проект: heyifei1984/pingouin

 def test_anova(self):
     """Test function anova."""
     # Pain dataset
     df_pain = read_dataset('anova')
     aov = anova(dv='Pain threshold',
                 between='Hair color',
                 data=df_pain,
                 detailed=True,
                 export_filename='test_export.csv')
     anova(dv='Pain threshold', between=['Hair color'], data=df_pain)
     # Compare with JASP
     assert np.allclose(aov.loc[0, 'F'], 6.791)
     assert np.allclose(np.round(aov.loc[0, 'p-unc'], 3), .004)
     assert np.allclose(aov.loc[0, 'np2'], .576)
     # Two-way ANOVA
     anova(dv='Scores',
           between=['Group', 'Time'],
           data=df,
           export_filename='test_export.csv')
     anova2(dv='Scores', between=['Group', 'Time'], data=df)
     anova2(dv='Scores', between=['Group'], data=df)
     anova2(dv='Scores', between='Group', data=df)

Пример #14

0

Показать файл

Файл: test_parametric.py Проект: heyifei1984/pingouin

import pandas as pd
import numpy as np

from unittest import TestCase
from pingouin.parametric import (ttest, anova, anova2, rm_anova, mixed_anova,
                                 rm_anova2, ancova, welch_anova, ancovan)
from pingouin.datasets import read_dataset

# Generate random data for ANOVA
df = read_dataset('mixed_anova.csv')

df_nan = df.copy()
df_nan.iloc[[4, 15], 0] = np.nan

# Create random normal variables
np.random.seed(1234)
x = np.random.normal(scale=1., size=100)
y = np.random.normal(scale=0.8, size=100)


class TestParametric(TestCase):
    """Test parametric.py."""
    def test_ttest(self):
        """Test function ttest"""
        h = np.random.normal(scale=0.9, size=95)
        ttest(x, 0.5)
        stats = ttest(x, y, paired=True, tail='one-sided')
        # Compare with JASP
        assert np.allclose(stats.loc['T-test', 'T'], 0.616)
        assert np.allclose(stats.loc['T-test', 'p-val'].round(3), .270)
        ttest(x, y, paired=False, correction='auto')

Пример #15

0

Показать файл

Файл: test_regression.py Проект: heyifei1984/pingouin

import pytest
import numpy as np
from numpy.testing import assert_almost_equal, assert_equal
from unittest import TestCase
from pingouin.regression import (linear_regression, logistic_regression,
                                 mediation_analysis)
from pingouin.datasets import read_dataset

from scipy.stats import linregress
from sklearn.linear_model import LinearRegression

df = read_dataset('mediation')


class TestRegression(TestCase):
    """Test regression.py."""
    def test_linear_regression(self):
        """Test function linear_regression."""

        # Simple regression
        lm = linear_regression(df['X'], df['Y'])  # Pingouin
        sc = linregress(df['X'].values, df['Y'].values)  # SciPy
        assert_equal(lm['names'].values, ['Intercept', 'X'])
        assert_almost_equal(lm['coef'][1], sc.slope)
        assert_almost_equal(lm['coef'][0], sc.intercept)
        assert_almost_equal(lm['se'][1], sc.stderr)
        assert_almost_equal(lm['pval'][1], sc.pvalue)
        assert_almost_equal(np.sqrt(lm['r2'][0]), sc.rvalue)

        # Multiple regression with intercept
        X = df[['X', 'M']].values

Python read_dataset примеры использования