def test00(self): # skip 4 lines # DON'T MESS WITH THE SPACING with open('test.csv','wb') as f: f.write(""" x,y,z 1,5,9 2,6,10 3,7,11 4,8,12""") self.df=DataFrame() self.df.read_tbl('data/skiptest.csv',skip=4) D = list(self.df['x']) + \ list(self.df['y']) + \ list(self.df['z']) R=range(1,13) for (d,r) in zip(D,R): self.assertAlmostEqual(d,r)
def test03(self): # duplicate labels with open('test.csv','wb') as f: f.write(""" x,x,x 1,5,9 2,6,10 3,7,11 4,8,12""") self.df=DataFrame() with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # Trigger a warning. self.df.read_tbl('test.csv',skip=1,labels=True) assert issubclass(w[-1].category, RuntimeWarning) D = list(self.df['x']) + \ list(self.df['x_2'])+ \ list(self.df['x_3']) R=range(1,13) for (d,r) in zip(D,R): self.assertAlmostEqual(d,r)
def test04(self): # line missing data, no comma after 6 with open('test.csv','wb') as f: f.write(""" x,y,z 1,5,9 2,6 3,7,11 4,8,12""") self.df=DataFrame() with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # Trigger a warning. self.df.read_tbl('test.csv',skip=1,labels=True) assert issubclass(w[-1].category, RuntimeWarning) D = list(self.df['x']) + \ list(self.df['y']) + \ list(self.df['z']) R=[1,3,4,5,7,8,9,11,12] for (d,r) in zip(D,R): self.assertAlmostEqual(d,r)
def setUp(self): D = { 'SUBJECT': [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100 ], 'AGE': 'old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young' .split(','), 'CONDITION': 'counting,counting,counting,counting,counting,counting,counting,counting,counting,counting,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,intention,intention,intention,intention,intention,intention,intention,intention,intention,intention,counting,counting,counting,counting,counting,counting,counting,counting,counting,counting,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,intention,intention,intention,intention,intention,intention,intention,intention,intention,intention' .split(','), 'WORDS': [ 9, 8, 6, 8, 10, 4, 6, 5, 7, 7, 7, 9, 6, 6, 6, 11, 6, 3, 8, 7, 11, 13, 8, 6, 14, 11, 13, 13, 10, 11, 12, 11, 16, 11, 9, 23, 12, 10, 19, 11, 10, 19, 14, 5, 10, 11, 14, 15, 11, 11, 8, 6, 4, 6, 7, 6, 5, 7, 9, 7, 10, 7, 8, 10, 4, 7, 10, 6, 7, 7, 14, 11, 18, 14, 13, 22, 17, 16, 12, 11, 20, 16, 16, 15, 18, 16, 20, 22, 14, 19, 21, 19, 17, 15, 22, 16, 22, 22, 18, 21 ], } self.df = DataFrame() self.df.read_tbl('data/words~ageXcondition.csv')
def test11(self): df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') D = str(df.descriptives('ERROR')) R = """\ Descriptive Statistics ERROR ========================== count 48.000 mean 3.896 mode 3.000 var 5.797 stdev 2.408 sem 0.348 rms 4.567 min 0.000 Q1 2.000 median 3.000 Q3 5.000 max 10.000 range 10.000 95ci_lower 3.215 95ci_upper 4.577 """ self.assertEqual(D, R)
def test1(self): R = """\ Chi-Square: Single Factor SUMMARY 1 2 3 4 ============================================ Observed 7 20 23 9 Expected 14.750 14.750 14.750 14.750 CHI-SQUARE TESTS Value df P ======================================== Pearson Chi-Square 12.797 3 0.005 Likelihood Ratio 13.288 3 0.004 Observations 59 POST-HOC POWER Measure ============================== Effect size w 0.466 Non-centrality lambda 12.797 Critical Chi-Square 7.815 Power 0.865 """ df = DataFrame() df.read_tbl('data/chi_test.csv') X = df.chisquare1way('RESULT') self.assertEqual(str(X), R)
def test07(self): # labels have spaces with open('test.csv','wb') as f: f.write(""" y 1, y 2 , y 3 1,5,9 2,6, 3,7,11 4,8,12""") self.df=DataFrame() self.df.read_tbl('test.csv',skip=1,labels=True) print(self.df) for z in self.df['y 3']: print(type(z)) D = list(self.df['y 1']) + \ list(self.df['y 2']) + \ list(self.df['y 3']) R=[1,2,3,4,5,6,7,8,9,np.ma.core.MaskedConstant(),11,12] for (d,r) in zip(D,R): self.assertEqual(str(d),str(r))
def test1(self): df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY', 'MODEL'], ['COURSE']) self.assertEqual(repr(eval(repr(pt))), repr(pt))
def test1(self): R = """\ t-Test: One Sample for means SUPPRESSION ===================================== Sample Mean 19.541 Hypothesized Pop. Mean 17 Variance 228.326 Observations 384 df 383 t Stat 3.295 alpha 0.050 P(T<=t) one-tail 5.384e-04 t Critical one-tail 1.966 P(T<=t) two-tail 0.001 t Critical two-tail 1.649 P(T<=t) two-tail 0.001 Effect size d 0.168 delta 3.295 Observed power one-tail 0.950 Observed power two-tail 0.908 """ df = DataFrame() df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv') D = df.ttest('SUPPRESSION', pop_mean=17.) self.assertEqual(str(D), R)
def test0(self): df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) self.assertAlmostEqual(3.22222222222, pt[1, 0], 5)
def test_kn(self): df = DataFrame() df.read_tbl('data/example.csv') y = [23] * len(df['X']) df['X'] = y self.assertEqual(df.keys(), ['CASE', 'TIME', 'CONDITION', 'X'])
def test3(self): """unequal""" df=DataFrame() df[1]=range(10) df[2]=range(10) df[3]=range(10) df[4]=range(9) self.assertFalse(df._are_col_lengths_equal())
def test2(self): """equal non-zero""" df=DataFrame() df[1]=range(10) df[2]=range(10) df[3]=range(10) df[4]=range(10) self.assertTrue(df._are_col_lengths_equal())
def test05(self): R = """Marginals([('factorials', OrderedDict([('AGE', [u'old', u'old', u'old', u'old', u'old']), ('CONDITION', [u'adjective', u'counting', u'imagery', u'intention', u'rhyming'])])), ('dmu', [11.0, 7.0, 13.4, 12.0, 6.9000000000000004]), ('dN', [10, 10, 10, 10, 10]), ('dsem', [0.78881063774661542, 0.57735026918962573, 1.4236104336041748, 1.1832159566199232, 0.67412494720522276]), ('dlower', [9.4539311500166345, 5.868393472388334, 10.609723550135818, 9.6808967250249509, 5.578715103477764]), ('dupper', [12.546068849983365, 8.131606527611666, 16.190276449864182, 14.319103274975049, 8.2212848965222367])], val='WORDS', factors=['AGE', 'CONDITION'], where='AGE == "old"')""" df=DataFrame() df.read_tbl('data/words~ageXcondition.csv') D = df.marginals('WORDS', factors=['AGE','CONDITION'], where='AGE == "old"')
def test3(self): df=DataFrame() with self.assertRaises(Exception) as cm: df.validate({'GROUP' : lambda x: x in ['AA', 'AB', 'LAB']}) self.assertEqual(str(cm.exception), 'table must have data to validate data')
def test02(self): df = DataFrame() df.read_tbl('data/words~ageXcondition.csv') D = repr(df.histogram('WORDS')) R = "Histogram([('values', [4.0, 14.0, 17.0, 12.0, 15.0, 10.0, 9.0, 5.0, 6.0, 8.0]), \ ('bin_edges', [3, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23])], cname='WORDS')" self.assertEqual(D, R)
def test3(self): df = DataFrame() with self.assertRaises(Exception) as cm: df.sort() self.assertEqual(str(cm.exception), 'Table must have data to sort data')
def test12(self): df = DataFrame() df['DUM'] = range(48) # Shouldn't complain self.assertEqual(df.keys(), ['DUM']) df['DUM'] = ['A' for i in range(48)] # Shouldn't complain self.assertEqual(df.keys(), ['DUM']) self.assertEqual(df._sqltypesdict['DUM'], 'text')
def test3(self): df = DataFrame() with self.assertRaises(Exception) as cm: df.box_plot('a', output_dir='output') self.assertEqual(str(cm.exception), 'Table must have data to print data')
def test6(self): df = DataFrame() df['a'] = [2, 5] df['b'] = [2, 3] with self.assertRaises(KeyError) as cm: df.box_plot('c', output_dir='output') self.assertEqual(str(cm.exception), "'c'")
def test4(self): df = DataFrame() df['a'] = [2] df['b'] = [2, 3] with self.assertRaises(Exception) as cm: df.sort() self.assertEqual(str(cm.exception), 'columns have unequal lengths')
def test4(self): df = DataFrame() df['a'] = [2] df['b'] = [2, 3] with self.assertRaises(Exception) as cm: df.box_plot('a', output_dir='output') self.assertEqual(str(cm.exception), 'columns have unequal lengths')
def test5(self): df = DataFrame() df['a'] = [2, 5] df['b'] = [2, 3] with self.assertRaises(Exception) as cm: df.sort(42) self.assertEqual(str(cm.exception), "'int' object is not iterable")
def test5(self): df = DataFrame() df['a'] = [2, 5] df['b'] = [2, 3] with self.assertRaises(Exception) as cm: df.box_plot('a', 42, output_dir='output') self.assertEqual(str(cm.exception), "'int' object is not iterable")
def test0(self): self.df1 = DataFrame() self.df1.read_tbl('data/words~ageXcondition.csv') with self.assertRaises(Exception) as cm: self.df1.attach('s') self.assertEqual(str(cm.exception), 'second argument must be a DataFrame')
def test2(self): ## Between-Subjects test R = """WORDS ~ AGE * CONDITION TESTS OF BETWEEN-SUBJECTS EFFECTS Measure: WORDS Source Type III df MS F Sig. et2_G Obs. SE 95% CI lambda Obs. SS Power =============================================================================================================== AGE 240.250 1 240.250 29.936 3.981e-07 0.250 50 0.406 0.796 16.631 0.981 CONDITION 1514.940 4 378.735 47.191 2.530e-21 0.677 20 0.642 1.258 41.948 1.000 AGE * CONDITION 190.300 4 47.575 5.928 2.793e-04 0.209 10 0.908 1.780 2.635 0.207 Error 722.300 90 8.026 =============================================================================================================== Total 2667.790 99 TABLES OF ESTIMATED MARGINAL MEANS Estimated Marginal Means for AGE AGE Mean Std. Error 95% Lower Bound 95% Upper Bound =============================================================== old 10.060 0.567 8.949 11.171 young 13.160 0.818 11.556 14.764 Estimated Marginal Means for CONDITION CONDITION Mean Std. Error 95% Lower Bound 95% Upper Bound =================================================================== adjective 12.900 0.791 11.350 14.450 counting 6.750 0.362 6.041 7.459 imagery 15.500 0.933 13.671 17.329 intention 15.650 1.096 13.502 17.798 rhyming 7.250 0.452 6.363 8.137 Estimated Marginal Means for AGE * CONDITION AGE CONDITION Mean Std. Error 95% Lower Bound 95% Upper Bound =========================================================================== old adjective 11 0.789 9.454 12.546 old counting 7 0.577 5.868 8.132 old imagery 13.400 1.424 10.610 16.190 old intention 12 1.183 9.681 14.319 old rhyming 6.900 0.674 5.579 8.221 young adjective 14.800 1.104 12.637 16.963 young counting 6.500 0.453 5.611 7.389 young imagery 17.600 0.819 15.994 19.206 young intention 19.300 0.844 17.646 20.954 young rhyming 7.600 0.618 6.388 8.812 """ df = DataFrame() fname = 'data/words~ageXcondition.csv' df.read_tbl(fname) aov = Anova() aov.run(df, 'WORDS', bfactors=['AGE', 'CONDITION']) self.assertEqual(str(aov), R)
def test0(self): R = """SUBJECT TIMEOFDAY COURSE MODEL ERROR ============================================ 1 T1 C1 M1 10 1 T1 C1 M2 8 1 T1 C1 M3 6 1 T1 C2 M1 9 1 T1 C3 M1 7 1 T1 C3 M2 6 1 T1 C3 M3 3 1 T2 C1 M1 5 1 T2 C1 M2 4 1 T2 C1 M3 3 1 T2 C2 M1 4 1 T2 C2 M2 3 1 T2 C2 M3 3 1 T2 C3 M1 2 1 T2 C3 M2 2 1 T2 C3 M3 1 2 T1 C2 M1 10 2 T1 C2 M2 6 2 T1 C2 M3 4 2 T1 C3 M1 4 2 T1 C3 M2 5 2 T1 C3 M3 2 2 T2 C1 M1 4 2 T2 C1 M2 3 2 T2 C1 M3 3 2 T2 C2 M1 4 2 T2 C2 M2 2 2 T2 C2 M3 2 2 T2 C3 M1 2 2 T2 C3 M2 3 2 T2 C3 M3 2 3 T1 C1 M1 8 3 T1 C1 M2 7 3 T1 C1 M3 4 3 T1 C2 M1 7 3 T1 C2 M3 3 3 T1 C3 M1 3 3 T1 C3 M2 4 3 T1 C3 M3 2 3 T2 C1 M1 4 3 T2 C1 M2 1 3 T2 C1 M3 2 3 T2 C2 M1 3 3 T2 C2 M2 3 3 T2 C2 M3 2 3 T2 C3 M1 1 3 T2 C3 M2 0 3 T2 C3 M3 1 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') self.assertEqual(str(df), R)
def test4(self): """method='full', aggregate=tolist, invalid col""" R = """\ tolist(id) member Name=name1, Name=name1, Name=name2, Name=name2, Year=2010 Year=2011 Year=2010 Year=2011 ================================================================== N [None, None] [1.0, 1.0] [None, None] [None, None] Y [0.0, 0.0] [None, None] [None, None] [2.0, 2.0] """ df = DataFrame() df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':1}) df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':1}) df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':1}) df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':2}) df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':2}) df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':2}) my_pivot = df.pivot('id',rows = ['member'], cols = ['Name','Year'], aggregate='tolist', method='full') ## print(my_pivot) self.assertEqual(R,str(my_pivot))
def test3(self): """method='full', aggregate=tolist, invalid row""" R = """\ tolist(id) Name Year member=N member=Y ========================================== name1 2010 [None, None] [0.0, 0.0] name1 2011 [1.0, 1.0] [None, None] name2 2010 [None, None] [None, None] name2 2011 [None, None] [2.0, 2.0] """ df = DataFrame() df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':1}) df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':1}) df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':1}) df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':2}) df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':2}) df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':2}) my_pivot = df.pivot('id',rows = ['Name','Year'], cols = ['member'], aggregate='tolist', method='full') ## print(my_pivot) self.assertEqual(R,str(my_pivot))
def test4(self): df = DataFrame() df[1] = range(100) df[2] = ['bob' for i in range(100)] df[3] = [i * 1.234232 for i in range(100)] df[4] = ['bob' for i in range(50)] + range(50) with self.assertRaises(TypeError) as cm: df._build_sqlite3_tbl(df.keys()[:2], 42) self.assertEqual(str(cm.exception), "'int' object is not iterable")