forked from tomstafford/axongame
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ps_fig3.py
90 lines (68 loc) · 2.95 KB
/
ps_fig3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#modules----------------------------------------
import pickle
import scipy.stats.mstats as ssm
import numpy as np
import matplotlib as plt
from pylab import *
import scipy.stats
loading=1
if not loading:
#first find actual data -------------------------
print "OBSERVED DATA (fig4_observed.py)"
execfile("ps_fig3obs.py")
#second generate bootstrap data -----------------
print "BOOTSTRAP CONFIDENCE LIMITS"
execfile("ps_fig3boot.py") #this can take a long time (e.g. 24 hours) if you use many (e.g. 2000) resamples
else:
print "LOADING DATA"
#load
#observed data
plot_timespread = pickle.load(open('save_plot_timespread.p', 'rb'))
#bootstrap data
bootdata = pickle.load(open('save_a4_2boot_bootdata.p','rb'))
print "finding CIs"
#find CIs, using ssm
ci_upper=np.zeros( (1,100))
ci_lower=np.zeros( (1,100))
m_boot=np.zeros( (1,100))
for i in range(100):
ci_upper[0,i]=ssm.scoreatpercentile(bootdata[i,:],97.5)
ci_lower[0,i]=ssm.scoreatpercentile(bootdata[i,:],02.5)
m_boot[0,i]=np.mean(bootdata[i,:])
print "running t-test"
#make the same shape
expt=a=np.reshape(m_boot[(0,)],100) #expected values (from bootstrap, ie under H0)
obsv=a=np.reshape(plot_timespread,100) #observed values
#recode so that a positive difference supports the theory (ie that spacing helps)
#for the bottom 50% this means their observed is lower than expected
#for the top 50% this means their obseved is higher than expected
diffs=np.concatenate([expt[0:50]-obsv[0:50],obsv[50:100]-expt[50:100]])
print "mean difference in difference from zero (+ve means more spacing among higher scorers, less among low)= %6.2f" % mean(diffs)
print 't-statistic = %6.3f pvalue = %6.4f' % scipy.stats.ttest_1samp(diffs,0)
print "PLOTTING"
# plot -------------------------------------------
# thank you tomas http://www.staff.ncl.ac.uk/tom.holderness/software/pythonlinearfit
plt.clf()
# plot sample data
plot(plot_timespread,'ro',label='sample means')
# plot line of best fit
plot(m_boot[(0,)],'b-',label='bootstrap mean')
# plot confidence limits
plot(ci_lower[(0,)],'b--',label='confidence limits (95%)')
plot(ci_upper[(0,)],'b--')
# configure legend
legend(loc=4) #lower left http://matplotlib.org/users/legend_guide.html
leg = gca().get_legend()
ltext = leg.get_texts()
setp(ltext, fontsize=10)
xlabel('percentile according to maximum score')
ylabel('average gap between first and last plays/hours')
savefig('StaffordFig3.png', dpi=None, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format=None,
transparent=False, bbox_inches=None, pad_inches=0.1)
generatepaperfigs=1
if generatepaperfigs:
savefig('../psychscience/StaffordFig3.pdf', dpi=300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format=None,
transparent=False, bbox_inches=None, pad_inches=0.1)
print "done!"