def test_pairwise_corr(self): """Test function pairwise_corr""" # Load JASP Big 5 DataSets (remove subject column) data = read_dataset('pairwise_corr').iloc[:, 1:] stats = pairwise_corr(data=data, method='pearson', tail='two-sided') jasp_rval = [ -0.350, -0.01, -.134, -.368, .267, .055, .065, .159, -.013, .159 ] assert np.allclose(stats['r'].values, jasp_rval) assert stats['n'].values[0] == 500 # Correct for multiple comparisons pairwise_corr(data=data, method='spearman', tail='one-sided', padjust='bonf') # Export pairwise_corr(data=data, method='spearman', tail='one-sided', export_filename='test_export.csv') # Check with a subset of columns pairwise_corr(data=data, columns=['Neuroticism', 'Extraversion']) with pytest.raises(ValueError): pairwise_corr(data=data, tail='wrong') with pytest.raises(ValueError): pairwise_corr(data=data, columns='wrong') # Check with non-numeric columns data['test'] = 'test' pairwise_corr(data=data, method='pearson') # Check different variation of product / combination n = data.shape[0] data['Age'] = np.random.randint(18, 65, n) data['IQ'] = np.random.normal(105, 1, n) data['One'] = 1 data['Gender'] = np.repeat(['M', 'F'], int(n / 2)) pairwise_corr(data, columns=['Neuroticism', 'Gender'], method='shepherd') pairwise_corr(data, columns=['Neuroticism', 'Extraversion', 'Gender']) pairwise_corr(data, columns=['Neuroticism']) pairwise_corr(data, columns='Neuroticism', method='skipped') pairwise_corr(data, columns=[['Neuroticism']], method='spearman') pairwise_corr(data, columns=[['Neuroticism'], None], method='percbend') pairwise_corr(data, columns=[['Neuroticism', 'Gender'], ['Age']]) pairwise_corr(data, columns=[['Neuroticism'], ['Age', 'IQ']]) pairwise_corr(data, columns=[['Age', 'IQ'], []]) pairwise_corr(data, columns=['Age', 'Gender', 'IQ', 'Wrong']) pairwise_corr(data, columns=['Age', 'Gender', 'Wrong']) # Test with no good combinations with pytest.raises(ValueError): pairwise_corr(data, columns=['Gender', 'Gender']) # Test when one column has only one unique value pairwise_corr(data=data, columns=['Age', 'One', 'Gender']) stats = pairwise_corr(data, columns=['Neuroticism', 'IQ', 'One']) assert stats.shape[0] == 1 # Test with covariate pairwise_corr(data, covar='Age') pairwise_corr(data, covar=['Age', 'Neuroticism']) with pytest.raises(AssertionError): pairwise_corr(data, covar=['Age', 'Gender']) with pytest.raises(ValueError): pairwise_corr(data, columns=['Neuroticism', 'Age'], covar='Age') # Partial pairwise with missing values data.loc[4, 'Age'] = np.nan data.loc[10, 'Neuroticism'] = np.nan pairwise_corr(data, covar='Age') ###################################################################### # MultiIndex columns from numpy.random import random as rdm # Create MultiIndex dataframe columns = pd.MultiIndex.from_tuples([('Behavior', 'Rating'), ('Behavior', 'RT'), ('Physio', 'BOLD'), ('Physio', 'HR'), ('Psycho', 'Anxiety')]) data = pd.DataFrame( dict(Rating=rdm(size=10), RT=rdm(size=10), BOLD=rdm(size=10), HR=rdm(size=10), Anxiety=rdm(size=10))) data.columns = columns pairwise_corr(data, method='spearman') stats = pairwise_corr(data, columns=[('Behavior', 'Rating')]) assert stats.shape[0] == data.shape[1] - 1 pairwise_corr(data, columns=[('Behavior', 'Rating'), ('Behavior', 'RT')]) st1 = pairwise_corr(data, columns=[[('Behavior', 'Rating'), ('Behavior', 'RT')], None]) st2 = pairwise_corr(data, columns=[[('Behavior', 'Rating'), ('Behavior', 'RT')]]) assert st1['X'].equals(st2['X']) st3 = pairwise_corr(data, columns=[[('Behavior', 'Rating')], [('Behavior', 'RT'), ('Physio', 'BOLD')]]) assert st3.shape[0] == 2 # With covar pairwise_corr(data, covar=[('Psycho', 'Anxiety')]) pairwise_corr(data, columns=[('Behavior', 'Rating')], covar=[('Psycho', 'Anxiety')])
def test_pairwise_corr(self): """Test function pairwise_corr""" # Load JASP Big 5 DataSets (remove subject column) data = read_dataset('pairwise_corr').iloc[:, 1:] stats = pairwise_corr(data=data, method='pearson', tail='two-sided') jasp_rval = [-0.350, -0.01, -.134, -.368, .267, .055, .065, .159, -.013, .159] assert np.allclose(stats['r'].round(3).to_numpy(), jasp_rval) assert stats['n'].to_numpy()[0] == 500 # Correct for multiple comparisons pairwise_corr(data=data, method='spearman', tail='one-sided', padjust='bonf') # Check with a subset of columns pairwise_corr(data=data, columns=['Neuroticism', 'Extraversion']) with pytest.raises(ValueError): pairwise_corr(data=data, columns='wrong') # Check with non-numeric columns data['test'] = 'test' pairwise_corr(data=data, method='pearson') # Check different variation of product / combination n = data.shape[0] data['Age'] = np.random.randint(18, 65, n) data['IQ'] = np.random.normal(105, 1, n) data['One'] = 1 data['Gender'] = np.repeat(['M', 'F'], int(n / 2)) pairwise_corr(data, columns=['Neuroticism', 'Gender'], method='shepherd') pairwise_corr(data, columns=['Neuroticism', 'Extraversion', 'Gender']) pairwise_corr(data, columns=['Neuroticism']) pairwise_corr(data, columns='Neuroticism', method='skipped') pairwise_corr(data, columns=[['Neuroticism']], method='spearman') pairwise_corr(data, columns=[['Neuroticism'], None], method='percbend') pairwise_corr(data, columns=[['Neuroticism', 'Gender'], ['Age']]) pairwise_corr(data, columns=[['Neuroticism'], ['Age', 'IQ']]) pairwise_corr(data, columns=[['Age', 'IQ'], []]) pairwise_corr(data, columns=['Age', 'Gender', 'IQ', 'Wrong']) pairwise_corr(data, columns=['Age', 'Gender', 'Wrong']) # Test with no good combinations with pytest.raises(ValueError): pairwise_corr(data, columns=['Gender', 'Gender']) # Test when one column has only one unique value pairwise_corr(data=data, columns=['Age', 'One', 'Gender']) stats = pairwise_corr(data, columns=['Neuroticism', 'IQ', 'One']) assert stats.shape[0] == 1 # Test with covariate pairwise_corr(data, covar='Age') pairwise_corr(data, covar=['Age', 'Neuroticism']) with pytest.raises(AssertionError): pairwise_corr(data, covar=['Age', 'Gender']) # Partial pairwise with overlapping covariates pairwise_corr( data.drop(columns=['One', 'Gender']), covar=data.drop(columns=['test', 'One', 'Gender']).columns ) pairwise_corr(data, columns='Neuroticism', covar='Age') with pytest.raises(AssertionError): pairwise_corr(data, columns=['Neuroticism', 'Age'], covar='One') # Test against pcorr: _pcorr = data.drop(columns=['One', 'Gender']).pcorr().stack() _pwcorr = pairwise_corr( data.drop(columns=['One', 'Gender']), covar=data.drop(columns=['test', 'One', 'Gender']).columns )[['X', 'Y', 'r']].set_index(['X', 'Y']).squeeze() assert np.allclose(_pcorr.reindex(_pwcorr.index), _pwcorr) # Partial pairwise with missing values data.loc[[4, 5, 8, 20, 22], 'Age'] = np.nan data.loc[[10, 12], 'Neuroticism'] = np.nan pairwise_corr(data) pairwise_corr(data, covar='Age') # Listwise deletion assert (pairwise_corr(data, covar='Age', nan_policy='listwise')['n'].nunique() == 1) assert pairwise_corr(data, nan_policy='listwise')['n'].nunique() == 1 ###################################################################### # MultiIndex columns from numpy.random import random as rdm # Create MultiIndex dataframe columns = pd.MultiIndex.from_tuples([('Behavior', 'Rating'), ('Behavior', 'RT'), ('Physio', 'BOLD'), ('Physio', 'HR'), ('Psycho', 'Anxiety')]) data = pd.DataFrame(dict(Rating=rdm(size=10), RT=rdm(size=10), BOLD=rdm(size=10), HR=rdm(size=10), Anxiety=rdm(size=10))) data.columns = columns pairwise_corr(data, method='spearman') stats = pairwise_corr(data, columns=[('Behavior', 'Rating')]) assert stats.shape[0] == data.shape[1] - 1 pairwise_corr(data, columns=[('Behavior', 'Rating'), ('Behavior', 'RT')]) st1 = pairwise_corr(data, columns=[[('Behavior', 'Rating'), ('Behavior', 'RT')], None]) st2 = pairwise_corr(data, columns=[[('Behavior', 'Rating'), ('Behavior', 'RT')]]) assert st1['X'].equals(st2['X']) st3 = pairwise_corr(data, columns=[[('Behavior', 'Rating')], [('Behavior', 'RT'), ('Physio', 'BOLD')]]) assert st3.shape[0] == 2 # With covar pairwise_corr(data, covar=[('Psycho', 'Anxiety')]) pairwise_corr(data, columns=[('Behavior', 'Rating')], covar=[('Psycho', 'Anxiety')]) pairwise_corr(data, covar=data.columns) # Test against pcorr: _pcorr = data.pcorr() _pcorr.index = _pcorr.index.tolist() # MultiIndex to tuples _pcorr.columns = _pcorr.columns.tolist() # MultiIndex to tuples _pwcorr = pairwise_corr( data, covar=data.columns )[['X', 'Y', 'r']].set_index(['X', 'Y']).squeeze() assert np.allclose(_pcorr.stack().reindex(_pwcorr.index), _pwcorr) # With missing values data.iloc[2, [2, 3]] = np.nan data.iloc[[1, 4], [1, 4]] = np.nan assert pairwise_corr(data, nan_policy='listwise')['n'].nunique() == 1 assert pairwise_corr(data, nan_policy='pairwise')['n'].nunique() == 3 assert (pairwise_corr(data, columns=[('Behavior', 'Rating')], covar=[('Psycho', 'Anxiety')], nan_policy='listwise')['n'].nunique() == 1) assert (pairwise_corr(data, columns=[('Behavior', 'Rating')], covar=[('Psycho', 'Anxiety')], nan_policy='pairwise')['n'].nunique() == 2)
def random_param(self): return [rdm() * (up - down) + down for default, down, up, name in self.param]
def generate_data(self, size=128, rdm_seed=1): r = rdm(rdm_seed) self.dataset_size = size X = r.rand(size, self.x_dimension) Y = [[int(sum(xs) < self.x_dimension/2)] for xs in X] return X, Y