def test1(self): R="""Bivariate Correlations A B C ====================================================== A spearman 1 0.958 -0.924 Sig (2-tailed) . 9.699e-12 2.259e-09 N 21 21 21 ------------------------------------------------------ B spearman 0.958 1 -0.890 Sig (2-tailed) 9.699e-12 . 0.000 N 21 21 21 ------------------------------------------------------ C spearman -0.924 -0.890 1 Sig (2-tailed) 2.259e-09 0.000 . N 21 21 21 Larzelere and Mulaik Significance Testing Pair i Correlation P alpha/(k-i+1) Sig. ============================================================ A vs. B 1 0.958 9.699e-12 0.017 ** A vs. C 2 0.924 2.259e-09 0.025 ** B vs. C 3 0.890 6.850e-08 0.050 ** """ df=DataFrame() df['A']=[24,61,59,46,43,44,52,43,58,67,62,57,71,49,54,43,53,57,49,56,33] df['B']=[42.93472681237495, 78.87307334936268, 75.37292628918023, 65.49076317291956, 55.55965179772366, 56.777730638998236, 62.19451880792437, 54.73710611356715, 72.10021832823149, 85.94377749485642, 78.2087578930983, 72.01681829338037, 84.27889316830063, 60.20516982367225, 65.6276497088971, 62.36549856901088, 69.18772114281175, 67.00548667483324, 59.042687027269466, 71.99214593063917, 45.00831155783992] df['C']=[-53.05540625388731, -96.33996451998567, -92.32465861908086, -70.90536432779966, -55.953777697739255, -74.12814626217357, -75.89188834814621, -64.24093256012688, -89.62208010083313, -87.41075066046812, -80.40932820298143, -77.99906284144805, -95.31607277596169, -61.672429800914486, -85.26088499198657, -63.4402296673869, -74.84950736563589, -85.00433219746624, -71.5901436929124, -76.43243666219388, -48.01082320924727] cor=df.correlation(['A','B','C'],coefficient='spearman') self.assertEqual(str(cor),R)
def test02(self): df=DataFrame() df.read_tbl('data/words~ageXcondition.csv') D = repr(df.histogram('WORDS')) R = "Histogram([('values', [4.0, 14.0, 17.0, 12.0, 15.0, 10.0, 9.0, 5.0, 6.0, 8.0]), \ ('bin_edges', [3, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23])], cname='WORDS')" self.assertEqual(D, R)
def test01(self): """repr test""" R = Descriptives([('count', 100.0), ('mean', 11.61), ('mode', 11.0), ('var', 26.947373737373752), ('stdev', 5.191085988246944), ('sem', 0.5191085988246944), ('rms', 12.707084638106414), ('min', 3.0), ('Q1', 7.0), ('median', 11.0), ('Q3', 15.5), ('max', 23.0), ('range', 20.0), ('95ci_lower', 10.592547146303598), ('95ci_upper', 12.6274528536964)], cname='WORDS') df = DataFrame() df.read_tbl('data/words~ageXcondition.csv') D = eval(repr(df.descriptives('WORDS'))) for k in D.keys(): self.failUnlessAlmostEqual(D[k],R[k])
def test_kn(self): df = DataFrame() df.read_tbl('data/example.csv') y = [23]*len(df['X']) df['X'] = y self.assertEqual(df.keys(), ['CASE', 'TIME', 'CONDITION', 'X'])
def test2(self): R = DataFrame([('SUBJECT', [1, 2]), ('TIMEOFDAY', [u'T1', u'T1']), ('COURSE', [u'C1', u'C2']), ('MODEL', [u'M1', u'M1']), ('ERROR', [10, 10])]) df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') df2 = df.where([('ERROR', '=', 10)]) self.assertEqual(repr(df2),repr(df2))
def test6(self): R = DataFrame([('SUBJECT', [1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3]), ('TIMEOFDAY', [u'T1', u'T1', u'T1', u'T2', u'T2', u'T2', u'T2', u'T2', u'T2', u'T1', u'T1', u'T1', u'T2', u'T2', u'T2']), ('COURSE', [u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1']), ('MODEL', [u'M1', u'M2', u'M3', u'M1', u'M2', u'M3', u'M1', u'M2', u'M3', u'M1', u'M2', u'M3', u'M1', u'M2', u'M3']), ('ERROR', [10, 8, 6, 5, 4, 3, 4, 3, 3, 8, 7, 4, 4, 1, 2])]) df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') df2 = df.where([('COURSE','=',['C1']),('TIMEOFDAY','in',["T1", "T2"])]) self.assertEqual(repr(df2),repr(R))
def test2(self): R="""\ Chi-Square: Single Factor SUMMARY 1 2 3 4 5 ===================================================== Observed 7 20 23 9 0 Expected 11.800 11.800 11.800 11.800 11.800 CHI-SQUARE TESTS Value df P ============================================ Pearson Chi-Square 30.746 4 3.450e-06 Likelihood Ratio -- -- -- Observations 59 POST-HOC POWER Measure ============================== Effect size w 0.722 Non-centrality lambda 30.746 Critical Chi-Square 9.488 Power 0.998 """ df = DataFrame() df.read_tbl('data/chi_test.csv') X=df.chisquare1way('RESULT',{1:11.8 ,2:11.8 ,3:11.8 ,4:11.8 ,5:11.8}) self.assertEqual(str(X),R)
def test1(self): R = {'d': [np.array([ 9, 8, 6, 8, 10, 4, 6, 5, 7, 7, 7, 9, 6, 6, 6, 11, 6, 3, 8, 7, 11, 13, 8, 6, 14, 11, 13, 13, 10, 11, 12, 11, 16, 11, 9, 23, 12, 10, 19, 11, 10, 19, 14, 5, 10, 11, 14, 15, 11, 11]), np.array([ 8, 6, 4, 6, 7, 6, 5, 7, 9, 7, 10, 7, 8, 10, 4, 7, 10, 6, 7, 7, 14, 11, 18, 14, 13, 22, 17, 16, 12, 11, 20, 16, 16, 15, 18, 16, 20, 22, 14, 19, 21, 19, 17, 15, 22, 16, 22, 22, 18, 21])], 'fname': 'output\\box(WORDS~AGE).png', 'maintitle': 'WORDS by AGE', 'xlabels': [u'AGE = old', u'AGE = young']} df=DataFrame() df.TESTMODE=True df.read_tbl('data/words~ageXcondition.csv') D=df.box_plot('WORDS',['AGE'], output_dir='output') self.assertEqual(D['fname'],R['fname']) self.assertEqual(D['maintitle'],R['maintitle']) self.assertEqual(D['xlabels'],R['xlabels']) for d,r in zip(np.array(D['d']).flat, np.array(R['d']).flat): self.assertAlmostEqual(d,r)
def test0(self): df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE']) self.assertAlmostEqual(3.22222222222,pt[1,0],5)
def test2(self): ## Between-Subjects test df=DataFrame() fname='words~ageXcondition.csv' df.read_tbl(fname) aov=Anova() aov.run(df,'WORDS',bfactors=['AGE','CONDITION'])
def test0(self): R = {'d': [9.0, 8.0, 6.0, 8.0, 10.0, 4.0, 6.0, 5.0, 7.0, 7.0, 7.0, 9.0, 6.0, 6.0, 6.0, 11.0, 6.0, 3.0, 8.0, 7.0, 11.0, 13.0, 8.0, 6.0, 14.0, 11.0, 13.0, 13.0, 10.0, 11.0, 12.0, 11.0, 16.0, 11.0, 9.0, 23.0, 12.0, 10.0, 19.0, 11.0, 10.0, 19.0, 14.0, 5.0, 10.0, 11.0, 14.0, 15.0, 11.0, 11.0, 8.0, 6.0, 4.0, 6.0, 7.0, 6.0, 5.0, 7.0, 9.0, 7.0, 10.0, 7.0, 8.0, 10.0, 4.0, 7.0, 10.0, 6.0, 7.0, 7.0, 14.0, 11.0, 18.0, 14.0, 13.0, 22.0, 17.0, 16.0, 12.0, 11.0, 20.0, 16.0, 16.0, 15.0, 18.0, 16.0, 20.0, 22.0, 14.0, 19.0, 21.0, 19.0, 17.0, 15.0, 22.0, 16.0, 22.0, 22.0, 18.0, 21.0], 'fname': 'output\\box(WORDS).png', 'maintitle': 'WORDS', 'val': 'WORDS'} df=DataFrame() df.TESTMODE=True df.read_tbl('data/words~ageXcondition.csv') D=df.box_plot('WORDS', output_dir='output') self.assertEqual(D['fname'],R['fname']) self.assertEqual(D['maintitle'],R['maintitle']) self.assertEqual(D['val'],R['val']) for d,r in zip(np.array(D['d']).flat, np.array(R['d']).flat): self.assertAlmostEqual(d,r)
def test1(self): R="""\ t-Test: One Sample for means SUPPRESSION ===================================== Sample Mean 19.541 Hypothesized Pop. Mean 17 Variance 228.326 Observations 384 df 383 t Stat 3.295 alpha 0.050 P(T<=t) one-tail 5.384e-04 t Critical one-tail 1.966 P(T<=t) two-tail 0.001 t Critical two-tail 1.649 P(T<=t) two-tail 0.001 Effect size d 0.168 delta 3.295 Observed power one-tail 0.950 Observed power two-tail 0.908 """ df = DataFrame() df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv') D=df.ttest('SUPPRESSION', pop_mean=17.) self.assertEqual(str(D),R)
def test2(self): df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['MODEL','TIMEOFDAY'],['COURSE'],where=['SUBJECT != 1']) self.assertEqual(repr(eval(repr(pt))), repr(pt))
def test1(self): R="""\ Chi-Square: Single Factor SUMMARY 1 2 3 4 ============================================ Observed 7 20 23 9 Expected 14.750 14.750 14.750 14.750 CHI-SQUARE TESTS Value df P ======================================== Pearson Chi-Square 12.797 3 0.005 Likelihood Ratio 13.288 3 0.004 Observations 59 POST-HOC POWER Measure ============================== Effect size w 0.466 Non-centrality lambda 12.797 Critical Chi-Square 7.815 Power 0.865 """ df = DataFrame() df.read_tbl('data/chi_test.csv') X=df.chisquare1way('RESULT') self.assertEqual(str(X),R)
def test11(self): df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') D = str(df.descriptives('ERROR')) R = """\ Descriptive Statistics ERROR ========================== count 48.000 mean 3.896 mode 3.000 var 5.797 stdev 2.408 sem 0.348 rms 4.567 min 0.000 Q1 2.000 median 3.000 Q3 5.000 max 10.000 range 10.000 95ci_lower 3.215 95ci_upper 4.577 """ self.assertEqual(D, R)
def test1(self): R = Descriptives([('count', 48.0), ('mean', 3.8958333333333335), ('mode', 3.0), ('var', 5.797429078014184), ('stdev', 2.4077850979716158), ('sem', 0.34753384361617046), ('rms', 4.566636252940086), ('min', 0.0), ('Q1', 2.0), ('median', 3.0), ('Q3', 5.0), ('max', 10.0), ('range', 10.0), ('95ci_lower', 3.2146669998456394), ('95ci_upper', 4.5769996668210275)], cname='ERROR') df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') D=df.descriptives('ERROR') for k in D.keys(): self.failUnlessAlmostEqual(D[k],R[k])
def export_csv_pivot(request, entidad=1, ano=str(date.today().year)): consumos = Consumo.objects.filter(entidad__pk=entidad, ano=ano) from collections import namedtuple LineaDetalle = namedtuple('LineaDetalle',[u'Año', "Mes", 'Local_o_Vehiculo', "Consumo", "Valor"]) df = DataFrame() for c in consumos: if c.content_type.id == 16: denominacion = Local.objects.get(pk=c.object_id).denominacion else: denominacion = Vehiculo.objects.get(pk=c.object_id).denominacion df.insert(LineaDetalle(c.ano, c.mes, denominacion.encode("utf-8"), c.medida.denominacion.encode("utf-8"), c.valor)._asdict()) pt = df.pivot("Valor", ['Local_o_Vehiculo','Consumo'], ['Mes']) # get the response object, this can be used as a stream. response = HttpResponse(mimetype='text/csv') # force download. response['Content-Disposition'] = 'attachment;filename=export.csv' response.write(pt) return response
def test3(self): df=DataFrame() with self.assertRaises(Exception) as cm: df.box_plot('a', output_dir='output') self.assertEqual(str(cm.exception), 'Table must have data to print data')
def test1(self): df=DataFrame() with self.assertRaises(Exception) as cm: df.insert([1,2,3,4]) self.assertEqual(str(cm.exception), 'row must be mappable type')
def test12(self): df=DataFrame() df['DUM']=range(48) # Shouldn't complain self.assertEqual(df.keys(),['DUM']) df['DUM']=['A' for i in range(48)] # Shouldn't complain self.assertEqual(df.keys(),['DUM']) self.assertEqual(df._sqltypesdict['DUM'],'text')
def test3(self): """unequal""" df=DataFrame() df[1]=range(10) df[2]=range(10) df[3]=range(10) df[4]=range(9) self.assertFalse(df._are_col_lengths_equal())
def test0(self): ## Within test df=DataFrame() fname='error~subjectXtimeofdayXcourseXmodel.csv' df.read_tbl(fname) aov=Anova() aov.run(df,'ERROR',wfactors=['TIMEOFDAY','COURSE','MODEL'])#,transform='windsor05') aov.output2html(fname[:-4]+'RESULTS.htm')
def test01(self): df=DataFrame() df.TESTMODE = True df.read_tbl('data/iqbrainsize.txt', delimiter='\t') D = df.scatter_plot('TOTVOL','FIQ', output_dir='output') self.assertEqual(None, D['trend'])
def test1(self): df=DataFrame() df.TESTMODE=True df.read_tbl('data/words~ageXcondition.csv') D=df.histogram_plot('WORDS', cumulative=True, output_dir='output') self.assertEqual(D['fname'],'output\\hist(WORDS,cumulative=True).png')
def test2(self): """equal non-zero""" df=DataFrame() df[1]=range(10) df[2]=range(10) df[3]=range(10) df[4]=range(10) self.assertTrue(df._are_col_lengths_equal())
def test3(self): ## Mixed Between/Within test df=DataFrame() fname='suppression~subjectXgroupXcycleXphase.csv' df.read_tbl(fname) df['SUPPRESSION']=[.01*x for x in df['SUPPRESSION']] aov=Anova() aov.run(df,'SUPPRESSION',wfactors=['CYCLE','PHASE'],bfactors=['GROUP'])#,transform='win
def test05(self): R = """Marginals([('factorials', OrderedDict([('AGE', [u'old', u'old', u'old', u'old', u'old']), ('CONDITION', [u'adjective', u'counting', u'imagery', u'intention', u'rhyming'])])), ('dmu', [11.0, 7.0, 13.4, 12.0, 6.9000000000000004]), ('dN', [10, 10, 10, 10, 10]), ('dsem', [0.78881063774661542, 0.57735026918962573, 1.4236104336041748, 1.1832159566199232, 0.67412494720522276]), ('dlower', [9.4539311500166345, 5.868393472388334, 10.609723550135818, 9.6808967250249509, 5.578715103477764]), ('dupper', [12.546068849983365, 8.131606527611666, 16.190276449864182, 14.319103274975049, 8.2212848965222367])], val='WORDS', factors=['AGE', 'CONDITION'], where='AGE == "old"')""" df=DataFrame() df.read_tbl('data/words~ageXcondition.csv') D = df.marginals('WORDS', factors=['AGE','CONDITION'], where='AGE == "old"')
def test3(self): df=DataFrame() with self.assertRaises(Exception) as cm: df.validate({'GROUP' : lambda x: x in ['AA', 'AB', 'LAB']}) self.assertEqual(str(cm.exception), 'table must have data to validate data')
def test3(self): df=DataFrame() with self.assertRaises(Exception) as cm: df.sort() self.assertEqual(str(cm.exception), 'Table must have data to sort data')
def test3(self): R = {'aggregate': 'ci', 'clevels': ['I', 'II'], 'fname': 'output\\whereGROUPnotLAB.png', 'maintitle': 'SUPPRESSION by CYCLE * AGE * PHASE * GROUP', 'numcols': 2, 'numrows': 2, 'rlevels': ['AA', 'AB'], 'subplot_titles': ['GROUP = AA, PHASE = AA', 'GROUP = AA, PHASE = AA', 'GROUP = AB, PHASE = AB', 'GROUP = AB, PHASE = AB'], 'xmaxs': [4.1500000000000004, 4.1500000000000004, 4.1500000000000004, 4.1500000000000004], 'xmins': [0.84999999999999998, 0.84999999999999998, 0.84999999999999998, 0.84999999999999998], 'y': [[[ 17.75 , 22.375, 23.125, 20.25 ], [ 8.675, 10.225, 10.5 , 9.925]], [[ 20.875, 28.125, 20.75 , 24.25 ], [ 8.3 , 10.25 , 9.525, 11.1 ]], [[ 12.625, 23.5 , 20. , 15.625], [ 5.525, 8.825, 9.125, 7.75 ]], [[ 22.75 , 41.125, 46.125, 51.75 ], [ 8.675, 13.1 , 14.475, 12.85 ]]], 'ymax': 64.8719707118471, 'ymin': 0.0} # separate y plots and separate x plots df=DataFrame() df.TESTMODE = True df.read_tbl('data\suppression~subjectXgroupXageXcycleXphase.csv') D = df.interaction_plot('SUPPRESSION','CYCLE', seplines='AGE', sepxplots='PHASE', sepyplots='GROUP',yerr='ci', where=[('GROUP','not in',['LAB'])], fname='whereGROUPnotLAB.png', output_dir='output') self.assertEqual(D['aggregate'], R['aggregate']) self.assertEqual(D['clevels'], R['clevels']) self.assertEqual(D['rlevels'], R['rlevels']) self.assertEqual(D['numcols'], R['numcols']) self.assertEqual(D['numrows'], R['numrows']) self.assertEqual(D['fname'], R['fname']) self.assertEqual(D['maintitle'], R['maintitle']) self.assertEqual(D['subplot_titles'], R['subplot_titles']) self.assertAlmostEqual(D['ymin'], R['ymin']) self.assertAlmostEqual(D['ymax'], R['ymax']) for d,r in zip(np.array(D['y']).flat,np.array(R['y']).flat): self.assertAlmostEqual(d,r)
def test1(self): R = """\ CYCLE PHASE GROUP=AA, GROUP=AA, GROUP=AB, GROUP=AB, GROUP=LAB, GROUP=LAB, AGE=old AGE=young AGE=old AGE=young AGE=old AGE=young ======================================================================================= 1 I 17.750 8.675 12.625 5.525 21.625 7.825 1 II 20.875 8.300 22.750 8.675 36.250 13.750 2 I 22.375 10.225 23.500 8.825 21.375 9.900 2 II 28.125 10.250 41.125 13.100 46.875 14.375 3 I 23.125 10.500 20.000 9.125 23.750 9.500 3 II 20.750 9.525 46.125 14.475 50.375 15.575 4 I 20.250 9.925 15.625 7.750 26.375 9.650 4 II 24.250 11.100 51.750 12.850 46.500 14.425 """ df = DataFrame() df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv') pt = df.pivot('SUPPRESSION', rows=['CYCLE', 'PHASE'], cols=['GROUP', 'AGE']) df2 = pt.to_dataframe() self.assertEqual(str(df2), R)
def test01(self): df = DataFrame() df.TESTMODE = True df.read_tbl('data/iqbrainsize.txt', delimiter='\t') D = df.scatter_plot('TOTVOL', 'FIQ', output_dir='output') self.assertEqual(None, D['trend'])
def test1(self): R = { 'd': [ np.array([ 9, 8, 6, 8, 10, 4, 6, 5, 7, 7, 7, 9, 6, 6, 6, 11, 6, 3, 8, 7, 11, 13, 8, 6, 14, 11, 13, 13, 10, 11, 12, 11, 16, 11, 9, 23, 12, 10, 19, 11, 10, 19, 14, 5, 10, 11, 14, 15, 11, 11 ]), np.array([ 8, 6, 4, 6, 7, 6, 5, 7, 9, 7, 10, 7, 8, 10, 4, 7, 10, 6, 7, 7, 14, 11, 18, 14, 13, 22, 17, 16, 12, 11, 20, 16, 16, 15, 18, 16, 20, 22, 14, 19, 21, 19, 17, 15, 22, 16, 22, 22, 18, 21 ]) ], 'fname': 'output\\box(WORDS~AGE).png', 'maintitle': 'WORDS by AGE', 'xlabels': ['AGE = old', 'AGE = young'] } df = DataFrame() df.TESTMODE = True df.read_tbl('data/words~ageXcondition.csv') D = df.box_plot('WORDS', ['AGE'], output_dir='output') self.assertEqual(D['fname'], R['fname']) self.assertEqual(D['maintitle'], R['maintitle']) self.assertEqual(D['xlabels'], R['xlabels']) for d, r in zip(np.array(D['d']).flat, np.array(R['d']).flat): self.assertAlmostEqual(d, r)
def test0(self): R = { 'd': [ 9.0, 8.0, 6.0, 8.0, 10.0, 4.0, 6.0, 5.0, 7.0, 7.0, 7.0, 9.0, 6.0, 6.0, 6.0, 11.0, 6.0, 3.0, 8.0, 7.0, 11.0, 13.0, 8.0, 6.0, 14.0, 11.0, 13.0, 13.0, 10.0, 11.0, 12.0, 11.0, 16.0, 11.0, 9.0, 23.0, 12.0, 10.0, 19.0, 11.0, 10.0, 19.0, 14.0, 5.0, 10.0, 11.0, 14.0, 15.0, 11.0, 11.0, 8.0, 6.0, 4.0, 6.0, 7.0, 6.0, 5.0, 7.0, 9.0, 7.0, 10.0, 7.0, 8.0, 10.0, 4.0, 7.0, 10.0, 6.0, 7.0, 7.0, 14.0, 11.0, 18.0, 14.0, 13.0, 22.0, 17.0, 16.0, 12.0, 11.0, 20.0, 16.0, 16.0, 15.0, 18.0, 16.0, 20.0, 22.0, 14.0, 19.0, 21.0, 19.0, 17.0, 15.0, 22.0, 16.0, 22.0, 22.0, 18.0, 21.0 ], 'fname': 'output\\box(WORDS).png', 'maintitle': 'WORDS', 'val': 'WORDS' } df = DataFrame() df.TESTMODE = True df.read_tbl('data/words~ageXcondition.csv') D = df.box_plot('WORDS', output_dir='output') self.assertEqual(D['fname'], R['fname']) self.assertEqual(D['maintitle'], R['maintitle']) self.assertEqual(D['val'], R['val']) for d, r in zip(np.array(D['d']).flat, np.array(R['d']).flat): self.assertAlmostEqual(d, r)
def test2(self): R = { 'A': [ -8.0, -7.0, -3.0, -2.0, -1.0, 1.0, 2.0, 3.0, 4.0, 9.0, -10.0, -9.0, -6.0, -5.0, -4.0, 0.0, 5.0, 6.0, 7.0, 8.0 ], 'B': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0 ] } a = [ 4, 8, 1, 5, -7, -5, 9, 7, -8, -10, -1, -4, 3, 0., -2, 6, 2, -9, -3, -6 ] b = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2] df = DataFrame() for A, B in zip(a, b): df.insert({'A': A, 'B': B}) df.sort(['B', 'A']) for d, r in zip(df['A'], R['A']): self.assertAlmostEqual(d, r) for d, r in zip(df['B'], R['B']): self.assertAlmostEqual(d, r)
class Test_writeTable(unittest.TestCase): def setUp(self): self.df = DataFrame() self.df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv') def test0(self): d = 'data/suppression~subjectXgroupXageXcycleXphase.csv' r = 'subjectXsexXageXgroupXcycleXphaseXsuppressionXranddata.csv' self.df.write() self.assertTrue(fcmp(d, r)) # clean up os.remove( './subjectXsexXageXgroupXcycleXphaseXsuppressionXranddata.csv') def test1(self): # with exclusion d = 'data/suppression~subjectXgroupXageXcycleXphase.csv' r = 'subjectXsexXageXgroupXcycleXphaseXsuppressionXranddata.csv' self.df.write(where=[('AGE', 'not in', ['young'])]) self.assertTrue(fcmp(d, r)) # clean up os.remove( './subjectXsexXageXgroupXcycleXphaseXsuppressionXranddata.csv')
def test06(self): R = """\ y 1 y 2 y 3 =============== 1 5 9 2 6 -- 3 7 11 4 8 12 """ # labels have spaces with open('test.csv', 'wb') as f: f.write(""" y 1,y 2,y 3 1,5,9 2,6, 3,7,11 4,8,12""") self.df = DataFrame() self.df.read_tbl('test.csv', skip=1, labels=True) self.assertAlmostEqual(str(self.df), R)
def test05(self): R = """\ x y z ========== 1 5 9 2 6 -- 3 7 11 4 8 12 """ # cell has empty string, comma after 6 with open('test.csv', 'wb') as f: f.write(""" x,y,z 1,5,9 2,6, 3,7,11 4,8,12""") self.df = DataFrame() self.df.read_tbl('test.csv', skip=1, labels=True) self.assertAlmostEqual(str(self.df), R)
def test4(self): df = DataFrame() df.insert([('GROUP', 'AA'), ('VAL', 1)]) with self.assertRaises(Exception) as cm: df.validate(lambda x: x in ['AA', 'AB', 'LAB']) self.assertEqual(str(cm.exception), 'criteria must be mappable type')
def test1(self): R = DataFrame([('SUBJECT', [1, 2]), ('TIMEOFDAY', ['T1', 'T1']), ('COURSE', ['C1', 'C2']), ('MODEL', ['M1', 'M1']), ('ERROR', [10, 10])]) df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') df2 = df.where(['ERROR = 10']) self.assertEqual(repr(df2), repr(R))
def test1(self): df = DataFrame() df.TESTMODE = True df.read_tbl('data/words~ageXcondition.csv') D = df.histogram_plot('WORDS', cumulative=True, output_dir='output') self.assertEqual(D['fname'], 'output\\hist(WORDS,cumulative=True).png')
def test3(self): R = """c b{L@^hsa aj} a(1%32@) =========================== 1 a 34 2 b 34 3 c 42 4 d 34 5 e 45 6 f 34 """ df=DataFrame() df.PRINTQUERIES = True df.insert({'a(1%32@)':34,'b{L@^hsa aj}':'a','c':1}) df.insert({'a(1%32@)':34,'b{L@^hsa aj}':'b','c':2}) df.insert({'a(1%32@)':42,'b{L@^hsa aj}':'c','c':3}) df.insert({'a(1%32@)':34,'b{L@^hsa aj}':'d','c':4}) df.insert({'a(1%32@)':45,'b{L@^hsa aj}':'e','c':5}) df.insert({'a(1%32@)':34,'b{L@^hsa aj}':'f','c':6}) self.assertEqual(R, str(df))
def test2(self): R="""Anova: Single Factor on SUPPRESSION SUMMARY Groups Count Sum Average Variance ============================================== AA 128 2048 16 148.792 AB 128 2510.600 19.614 250.326 LAB 128 2945.000 23.008 264.699 ANOVA Source of SS df MS F P-value Variation =========================================================== Treatments 3144.039 2 1572.020 7.104 9.348e-04 Error 84304.687 381 221.272 =========================================================== Total 87448.726 383 """ df = DataFrame() df.read_tbl('suppression~subjectXgroupXageXcycleXphase.csv') aov=df.anova1way('SUPPRESSION','GROUP') self.assertEqual(str(aov),R)
def test3(self): df = DataFrame() df.insert({'A': 1, 'B': 2}) with self.assertRaises(Exception) as cm: df.insert({'A': 1, 'B': 2, 'C': 3}) self.assertEqual(str(cm.exception), 'row must have the same keys as the table')
def test2(self): R = { 'aggregate': 'ci', 'clevels': [1], 'fname': 'output\\interaction_plot(SUPPRESSION~CYCLE_X_AGE_X_PHASE,yerr=95% ci).png', 'maintitle': 'SUPPRESSION by CYCLE * AGE * PHASE', 'numcols': 1, 'numrows': 2, 'rlevels': ['I', 'II'], 'subplot_titles': ['I', 'II'], 'xmaxs': [4.1749999999999998, 4.1749999999999998], 'xmins': [0.32499999999999996, 0.32499999999999996], 'y': [[[17.33333333, 22.41666667, 22.29166667, 20.75], [7.34166667, 9.65, 9.70833333, 9.10833333]], [[26.625, 38.70833333, 39.08333333, 40.83333333], [10.24166667, 12.575, 13.19166667, 12.79166667]]], 'yerr': [[1.81325589, 1.44901936, 1.60883063, 1.57118871], [2.49411239, 1.34873573, 1.95209851, 1.35412572]], 'ymax': 64.8719707118471, 'ymin': 0.0 } # generate yerr df = DataFrame() df.TESTMODE = True df.read_tbl('data\suppression~subjectXgroupXageXcycleXphase.csv') D = df.interaction_plot('SUPPRESSION', 'CYCLE', seplines='AGE', sepyplots='PHASE', yerr='ci', output_dir='output') self.assertEqual(D['aggregate'], R['aggregate']) self.assertEqual(D['clevels'], R['clevels']) self.assertEqual(D['rlevels'], R['rlevels']) self.assertEqual(D['numcols'], R['numcols']) self.assertEqual(D['numrows'], R['numrows']) self.assertEqual(D['fname'], R['fname']) self.assertEqual(D['maintitle'], R['maintitle']) self.assertEqual(D['subplot_titles'], R['subplot_titles']) self.assertAlmostEqual(D['ymin'], R['ymin']) self.assertAlmostEqual(D['ymax'], R['ymax']) for d, r in zip(np.array(D['y']).flat, np.array(R['y']).flat): self.assertAlmostEqual(d, r) for d, r in zip(np.array(D['yerr']).flat, np.array(R['yerr']).flat): self.assertAlmostEqual(d, r)
def test0(self): R = """Chi-Square: two Factor SUMMARY Guilty NotGuilty Total ===================================== High 105 76 181 (130.441) (50.559) Low 153 24 177 (127.559) (49.441) ===================================== Total 258 100 358 SYMMETRIC MEASURES Value Approx. Sig. =========================================== Cramer's V 0.317 8.686e-10 Contingency Coefficient 0.302 5.510e-09 N of Valid Cases 358 CHI-SQUARE TESTS Value df P =============================================== Pearson Chi-Square 35.930 1 2.053e-09 Continuity Correction 34.532 1 4.201e-09 Likelihood Ratio 37.351 1 0 N of Valid Cases 358 """ df = DataFrame() df['FAULTS'] = list(Counter(Low=177, High=181).elements()) df['FAULTS'].reverse() df['VERDICT'] = list(Counter(Guilty=153, NotGuilty=24).elements()) df['VERDICT'].extend(list( Counter(Guilty=105, NotGuilty=76).elements())) x2 = df.chisquare2way('FAULTS', 'VERDICT') self.assertEqual(str(x2), R)
def test02(self): df = DataFrame() df.read_tbl('data/words~ageXcondition.csv') D = str(df.descriptives('WORDS')) R = """\ Descriptive Statistics WORDS ========================== count 100.000 mean 11.610 mode 11.000 var 26.947 stdev 5.191 sem 0.519 rms 12.707 min 3.000 Q1 7.000 median 11.000 Q3 15.500 max 23.000 range 20.000 95ci_lower 10.593 95ci_upper 12.627 """ self.assertEqual(D, R)
def test5(self): R = [ """\ avg(ERROR) COURSE TIMEOFDAY=T1 TIMEOFDAY=T2 ==================================== C1 7.167 3.222 """, """\ avg(ERROR) COURSE TIMEOFDAY=T1 TIMEOFDAY=T2 ==================================== C2 6.500 2.889 """, """\ avg(ERROR) COURSE TIMEOFDAY=T1 TIMEOFDAY=T2 ==================================== C3 4 1.556 """ ] df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) for r, L in zip(R, pt.transpose()): self.assertAlmostEqual(r, str(L))
def test1(self): R = { 'aggregate': None, 'clevels': ['M1', 'M2', 'M3'], 'fname': 'output\\interaction_plot(ERROR~TIMEOFDAY_X_COURSE_X_MODEL,yerr=1.0).png', 'maintitle': 'ERROR by TIMEOFDAY * COURSE * MODEL', 'numcols': 3, 'numrows': 1, 'rlevels': [1], 'subplot_titles': ['M1', 'M2', 'M3'], 'xmaxs': [1.5, 1.5, 1.5], 'xmins': [-0.5, -0.5, -0.5], 'y': [[[9., 4.33333333], [8.66666667, 3.66666667], [4.66666667, 1.66666667]], [[7.5, 2.66666667], [6., 2.66666667], [5., 1.66666667]], [[5., 2.66666667], [3.5, 2.33333333], [2.33333333, 1.33333333]]], 'yerr': [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]], 'ymax': 11.119188627248182, 'ymin': 0.0 } # specify yerr df = DataFrame() df.TESTMODE = True df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') D = df.interaction_plot('ERROR', 'TIMEOFDAY', seplines='COURSE', sepxplots='MODEL', yerr=1., output_dir='output') self.assertEqual(D['aggregate'], R['aggregate']) self.assertEqual(D['clevels'], R['clevels']) self.assertEqual(D['rlevels'], R['rlevels']) self.assertEqual(D['numcols'], R['numcols']) self.assertEqual(D['numrows'], R['numrows']) self.assertEqual(D['fname'], R['fname']) self.assertEqual(D['maintitle'], R['maintitle']) self.assertEqual(D['subplot_titles'], R['subplot_titles']) self.assertAlmostEqual(D['ymin'], R['ymin']) self.assertAlmostEqual(D['ymax'], R['ymax']) for d, r in zip(np.array(D['y']).flat, np.array(R['y']).flat): self.assertAlmostEqual(d, r) for d, r in zip(np.array(D['yerr']).flat, np.array(R['yerr']).flat): self.assertAlmostEqual(d, r)
def test2(self): R = """\ Chi-Square: Single Factor SUMMARY 1 2 3 4 5 ===================================================== Observed 7 20 23 9 0 Expected 11.800 11.800 11.800 11.800 11.800 CHI-SQUARE TESTS Value df P ============================================ Pearson Chi-Square 30.746 4 3.450e-06 Likelihood Ratio -- -- -- Observations 59 POST-HOC POWER Measure ============================== Effect size w 0.722 Non-centrality lambda 30.746 Critical Chi-Square 9.488 Power 0.998 """ df = DataFrame() df.read_tbl('data/chi_test.csv') X = df.chisquare1way('RESULT', { 1: 11.8, 2: 11.8, 3: 11.8, 4: 11.8, 5: 11.8 }) self.assertEqual(str(X), R)
def test4(self): R = """t-Test: Paired Two Sample for means PRE POST ============================================= Mean 87.250 87.083 Variance 1207.659 1166.629 Observations 12 12 Pearson Correlation 0.995 df 11 t Stat 0.163 alpha 0.050 P(T<=t) one-tail 0.437 t Critical one-tail 2.201 P(T<=t) two-tail 0.873 t Critical two-tail 1.796 P(T<=t) two-tail 0.873 Effect size dz 0.047 delta 0.163 Observed power one-tail 0.068 Observed power two-tail 0.035 """ df = DataFrame() df.read_tbl('data/example2_prepost.csv') D = df.ttest('PRE', 'POST', paired=True) self.assertEqual(str(D), R)
def test3(self): tupa = ('a1','a2','a3') tupb = ('a1','b2','b3') df=DataFrame() df.insert([(('a1','a2','a3'),34), (('a1','b2','b3'),1)]) df.insert([(('a1','a2','a3'),34), (('a1','b2','b3'),2)]) df.insert([(('a1','a2','a3'),42), (('a1','b2','b3'),3)]) namea,nameb = df.keys() self.assertEqual(namea, tupa) self.assertEqual(nameb, tupb)
def test01(self): """confidence interval error bars specified""" R = { 'aggregate': 'ci', 'clevels': [1], 'fname': 'output\\interaction_plot(WORDS~AGE_X_CONDITION,yerr=95% ci).png', 'maintitle': 'WORDS by AGE * CONDITION', 'numcols': 1, 'numrows': 1, 'rlevels': [1], 'subplot_titles': [''], 'xmaxs': [1.5], 'xmins': [-0.5], 'y': [[[11.0, 14.8], [7.0, 6.5], [13.4, 17.6], [12.0, 19.3], [6.9, 7.6]]], 'yerr': [[]], 'ymin': 0.0, 'ymax': 27.183257964740832 } # a simple plot df = DataFrame() df.TESTMODE = True df.read_tbl('data/words~ageXcondition.csv') D = df.interaction_plot('WORDS', 'AGE', seplines='CONDITION', output_dir='output', yerr='ci') self.assertEqual(D['aggregate'], R['aggregate']) self.assertEqual(D['clevels'], R['clevels']) self.assertEqual(D['rlevels'], R['rlevels']) self.assertEqual(D['numcols'], R['numcols']) self.assertEqual(D['numrows'], R['numrows']) self.assertEqual(D['fname'], R['fname']) self.assertEqual(D['maintitle'], R['maintitle']) self.assertEqual(D['subplot_titles'], R['subplot_titles']) self.assertAlmostEqual(D['ymin'], R['ymin']) self.assertAlmostEqual(D['ymax'], R['ymax']) for d, r in zip(np.array(D['y']).flat, np.array(R['y']).flat): self.assertAlmostEqual(d, r) for d, r in zip(np.array(D['yerr']).flat, np.array(R['yerr']).flat): self.assertAlmostEqual(d, r)
def test1(self): """chi-square 2-way""" R = """\ Chi-Square: two Factor SUMMARY Litter Removed Trash Can Total ==================================================== Countrol 385 477 41 903 (343.976) (497.363) (61.661) Message 290 499 80 869 (331.024) (478.637) (59.339) ==================================================== Total 675 976 121 1772 SYMMETRIC MEASURES Value Approx. Sig. =========================================== Cramer's V 0.121 3.510e-07 Contingency Coefficient 0.120 4.263e-07 N of Valid Cases 1772 CHI-SQUARE TESTS Value df P ============================================ Pearson Chi-Square 25.794 2 2.506e-06 Likelihood Ratio 26.056 2 2.198e-06 N of Valid Cases 1772 CHI-SQUARE POST-HOC POWER Measure ============================== Effect size w 0.121 Non-centrality lambda 25.794 Critical Chi-Square 5.991 Power 0.997 """ df = DataFrame() rfactors = ['Countrol'] * 903 + ['Message'] * 869 cfactors = ['Trash Can'] * 41 + ['Litter'] * 385 + ['Removed'] * 477 cfactors += ['Trash Can'] * 80 + ['Litter'] * 290 + ['Removed'] * 499 x2 = ChiSquare2way() x2.run(rfactors, cfactors) self.assertEqual(str(x2), R)
def test4(self): R = { 'aggregate': None, 'clevels': ['adjective', 'counting', 'imagery', 'intention', 'rhyming'], 'fname': 'output\\interaction_plot(WORDS~AGE_X_CONDITION).png', 'maintitle': 'WORDS by AGE * CONDITION', 'numcols': 5, 'numrows': 1, 'rlevels': [1], 'subplot_titles': ['adjective', 'counting', 'imagery', 'intention', 'rhyming'], 'xmaxs': [1.5, 1.5, 1.5, 1.5, 1.5], 'xmins': [-0.5, -0.5, -0.5, -0.5, -0.5], 'y': [[11., 14.8], [7., 6.5], [13.4, 17.6], [12., 19.3], [6.9, 7.6]], 'yerr': [[], [], [], [], []], 'ymax': 27.183257964740832, 'ymin': 0.0 } # a simple plot df = DataFrame() df.TESTMODE = True df.read_tbl('data/words~ageXcondition.csv') D = df.interaction_plot('WORDS', 'AGE', sepxplots='CONDITION', output_dir='output') self.assertEqual(D['aggregate'], R['aggregate']) self.assertEqual(D['clevels'], R['clevels']) self.assertEqual(D['rlevels'], R['rlevels']) self.assertEqual(D['numcols'], R['numcols']) self.assertEqual(D['numrows'], R['numrows']) self.assertEqual(D['fname'], R['fname']) self.assertEqual(D['maintitle'], R['maintitle']) self.assertEqual(D['subplot_titles'], R['subplot_titles']) self.assertAlmostEqual(D['ymin'], R['ymin']) self.assertAlmostEqual(D['ymax'], R['ymax']) for d, r in zip(np.array(D['y']).flat, np.array(R['y']).flat): self.assertAlmostEqual(d, r) for d, r in zip(np.array(D['yerr']).flat, np.array(R['yerr']).flat): self.assertAlmostEqual(d, r)
def test6(self): R = { 'aggregate': 'ci', 'clevels': [1], 'fname': 'output\\interaction_plot(SUPPRESSION~CYCLE_X_PHASE,yerr=95% ci).png', 'maintitle': 'SUPPRESSION by CYCLE * PHASE', 'numcols': 1, 'numrows': 2, 'rlevels': ['I', 'II'], 'subplot_titles': ['I', 'II'], 'xmaxs': [4.1749999999999998, 4.1749999999999998], 'xmins': [0.82499999999999996, 0.82499999999999996], 'y': [[12.3375, 16.03333333, 16., 14.92916667], [18.43333333, 25.64166667, 26.1375, 26.8125]], 'yerr': [[3.18994762, 3.20528834, 3.26882751, 3.53477953], [3.98429064, 4.5950803, 4.9514978, 4.97429769]], 'ymax': 64.8719707118471, 'ymin': 0.0 } # generate yerr df = DataFrame() df.TESTMODE = True df.read_tbl('data\suppression~subjectXgroupXageXcycleXphase.csv') D = df.interaction_plot('SUPPRESSION', 'CYCLE', sepyplots='PHASE', yerr='ci', output_dir='output') self.assertEqual(D['aggregate'], R['aggregate']) self.assertEqual(D['clevels'], R['clevels']) self.assertEqual(D['rlevels'], R['rlevels']) self.assertEqual(D['numcols'], R['numcols']) self.assertEqual(D['numrows'], R['numrows']) self.assertEqual(D['fname'], R['fname']) self.assertEqual(D['maintitle'], R['maintitle']) self.assertEqual(D['subplot_titles'], R['subplot_titles']) self.assertAlmostEqual(D['ymin'], R['ymin']) self.assertAlmostEqual(D['ymax'], R['ymax']) for d, r in zip(np.array(D['y']).flat, np.array(R['y']).flat): self.assertAlmostEqual(d, r) for d, r in zip(np.array(D['yerr']).flat, np.array(R['yerr']).flat): self.assertAlmostEqual(d, r)
def test31(self): # separate y plots and separate x plots df = DataFrame() df.TESTMODE = True df.read_tbl('data\suppression~subjectXgroupXageXcycleXphase.csv') D = df.interaction_plot('SUPPRESSION', 'CYCLE', seplines='AGE', sepxplots='GROUP', sepyplots='PHASE', yerr='sem', output_dir='output')
def test22(self): """test with string keys and where condition""" df = DataFrame() df['1'] = list(range(100)) df['2'] = ['bob' for i in range(100)] df['3'] = [i * 1.234232 for i in range(100)] df['4'] = ['bob' for i in range(50)] + list(range(50)) shuffle(df['1']) shuffle(df['2']) shuffle(df['3']) df._build_sqlite3_tbl(list(df.keys())[:2], ['4 not in ("bob")']) df._execute('select * from TBL') for i, (a, b) in enumerate(df.cur): self.assertEqual(a, df['1'][i + 50]) self.assertEqual(b, df['2'][i + 50])
def test3(self): """test with string keys and tuple where condition""" df = DataFrame() df[1] = list(range(100)) df[2] = ['bob' for i in range(100)] df[3] = [i * 1.234232 for i in range(100)] df[4] = ['bob' for i in range(50)] + list(range(50)) shuffle(df[1]) shuffle(df[2]) shuffle(df[3]) df._build_sqlite3_tbl(list(df.keys())[:2], [(4, '!=', 'bob')]) df._execute('select * from TBL') for i, (a, b) in enumerate(df.cur): self.assertEqual(a, df[1][i + 50]) self.assertEqual(b, df[2][i + 50])
def long2wide(in_fname, id, dvs, between=[], within=[], covariates=[], out_fname=None, nested=True): # load in_fname into a PyvtTbl object print(('reading "%s"...' % in_fname)) cls = DataFrame() cls.read_tbl(in_fname) # loop through DVs and append within columns d = [sorted(set(cls[id]))] header = [id] + covariates + between for col in covariates + between: z = cls.pivot(col, cols=[id], aggregate='arbitrary') d.extend(list(z)) # start controls whether nested factors are examined if nested: start = 1 else: start = len(within) for i, dv in enumerate(dvs): print(('\ncollaborating %s' % dv)) for j in _xrange(start, len(within) + 1): for factors in _xunique_combinations(within, j): print((' pivoting', factors, '...')) z = cls.pivot(dv, rows=factors, cols=[id], aggregate='avg') d.extend(list(z)) # process headers for names in z.rnames: h = '_'.join(('%s.%s' % (f, str(c)) for (f, c) in names)) header.append('%s__%s' % (dv, h)) # Now we can write the data if out_fname == None: out_fname = 'wide_data.csv' with open(out_fname, 'wb') as f: wtr = csv.writer(f) wtr.writerow([n.upper() for n in header]) wtr.writerows(list(zip(*d))) # transpose and write