Пример #1
0
def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mnlvl = mstats.scoreatpercentile(data, 5)
    mxlvl = mstats.scoreatpercentile(data, 95)
    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs
Пример #2
0
def binner(x, y, w_sta, nbins, rang = None, ebar = False, per = None) :
	from numpy import array, digitize, lexsort, linspace
	from numpy.ma import average, median

	ind    = lexsort((y, x))
	xs, ys = x[ind], y[ind]

	if rang is None : mn, mx = min(xs), max(xs)
	else            : mn, mx = rang
	
	bins  = linspace(mn, mx, nbins + 1)
	x_cen = (bins[: - 1] + bins[1:])*0.5
	bins  = linspace(mn, mx, nbins)
	ibins = digitize(xs, bins)

	if w_sta   == "median" : y_sta = array([median(ys[ibins == i]) for i in range(1, bins.size + 1)])
	elif w_sta == "mean"   : y_sta = array([average(ys[ibins == i]) for i in range(1, bins.size + 1)])
	elif w_sta == "mode"   : y_sta = array([mode(ys[ibins == i])[0] for i in range(1, bins.size + 1)])

	if ebar   == False                : return x_cen, y_sta
	elif ebar == True and per == None :
		myer = abs(array([scoreatpercentile(ys[ibins == i], 15.8) for i in range(1, bins.size + 1)]) - y_sta)
		pyer = abs(array([scoreatpercentile(ys[ibins == i], 84.0) for i in range(1, bins.size + 1)]) - y_sta)
		yer  = array([myer, pyer])
		return x_cen, y_sta, yer

	elif ebar == True and per != None :
		myer = abs(array([scoreatpercentile(ys[ibins == i], per[0]) for i in range(1, bins.size + 1)]) - y_sta)
		pyer = abs(array([scoreatpercentile(ys[ibins == i], per[1]) for i in range(1, bins.size + 1)]) - y_sta)
		yer = array([myer, pyer])
		return x_cen, y_sta, yer
Пример #3
0
def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mnlvl = mstats.scoreatpercentile(data, 5)
    mxlvl = mstats.scoreatpercentile(data, 95)
    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs
Пример #4
0
def nbins(sample, range_ = None) :
  IQR = lambda x    : st.scoreatpercentile(x, 75.0) - st.scoreatpercentile(x, 25.0)
  if range_ is None : mn, mx = sample.min(), sample.max()
  else              : mn, mx = range_

  mask    = (sample >= mn) & (sample <= mx)
  binsize = (2 * IQR(sample[mask]) / mask.sum() ** (1. / 3))

  return (mx - mn) / binsize, mn, mx, binsize
Пример #5
0
def nbins(sample, range_=None):
    IQR = lambda x: st.scoreatpercentile(x, 75.0) - st.scoreatpercentile(
        x, 25.0)
    if range_ is None: mn, mx = sample.min(), sample.max()
    else: mn, mx = range_

    mask = (sample >= mn) & (sample <= mx)
    binsize = (2 * IQR(sample[mask]) / mask.sum()**(1. / 3))

    return (mx - mn) / binsize, mn, mx, binsize
Пример #6
0
def binner(x, y, w_sta, nbins, rang=None, ebar=False, per=None):
    from numpy import array, digitize, lexsort, linspace
    from numpy.ma import average, median

    ind = lexsort((y, x))
    xs, ys = x[ind], y[ind]

    if rang is None: mn, mx = min(xs), max(xs)
    else: mn, mx = rang

    bins = linspace(mn, mx, nbins + 1)
    x_cen = (bins[:-1] + bins[1:]) * 0.5
    bins = linspace(mn, mx, nbins)
    ibins = digitize(xs, bins)

    if w_sta == "median":
        y_sta = array(
            [median(ys[ibins == i]) for i in range(1, bins.size + 1)])
    elif w_sta == "mean":
        y_sta = array(
            [average(ys[ibins == i]) for i in range(1, bins.size + 1)])
    elif w_sta == "mode":
        y_sta = array(
            [mode(ys[ibins == i])[0] for i in range(1, bins.size + 1)])

    if ebar == False: return x_cen, y_sta
    elif ebar == True and per == None:
        myer = abs(
            array([
                scoreatpercentile(ys[ibins == i], 15.8)
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        pyer = abs(
            array([
                scoreatpercentile(ys[ibins == i], 84.0)
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        yer = array([myer, pyer])
        return x_cen, y_sta, yer

    elif ebar == True and per != None:
        myer = abs(
            array([
                scoreatpercentile(ys[ibins == i], per[0])
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        pyer = abs(
            array([
                scoreatpercentile(ys[ibins == i], per[1])
                for i in range(1, bins.size + 1)
            ]) - y_sta)
        yer = array([myer, pyer])
        return x_cen, y_sta, yer
Пример #7
0
    def clean_outliers(self):
        """
        Function to remove outliers.

        Parameters
        ----------
        self.outlier_perc : integer
            Percentile value for mstats.scoreatpercentile function. Mask all values greater than this value.
        """
        # Outliers using percentiles - num_rows * [min, max]
        outlier_all = ma.array([[mstats.scoreatpercentile(self.xs[i, :], 100 - self.outlier_perc),
               mstats.scoreatpercentile(self.xs[i, :], self.outlier_perc)] for i in xrange(self.rows_N)])
        self.xs = ma.array([ma.hstack((ma.masked_outside(self.xs[i, :-self.keep_n_values], outlier_all[i, 0],
            outlier_all[i, 1]), self.xs[i, -self.keep_n_values:])) for i in xrange(self.rows_N)])
Пример #8
0
 def test_2D(self):
     x = ma.array([[1, 1, 1],
                   [1, 1, 1],
                   [4, 4, 3],
                   [1, 1, 1],
                   [1, 1, 1]])
     assert_equal(mstats.scoreatpercentile(x,50), [1,1,1])
Пример #9
0
 def test_2D(self):
     x = ma.array([[1, 1, 1],
                   [1, 1, 1],
                   [4, 4, 3],
                   [1, 1, 1],
                   [1, 1, 1]])
     assert_equal(mstats.scoreatpercentile(x,50), [1,1,1])
Пример #10
0
def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mn = mstats.scoreatpercentile(data, 5)
    mx = mstats.scoreatpercentile(data, 95)
    # if min less than 0 and or max more than 0 put 0 in center of color bar
    if mn < 0 and mx > 0:
        level = max(abs(mn), abs(mx))
        mnlvl = -1 * level
        mxlvl = level
    # if min is larger than 0 then have color bar between min and max
    else:
        mnlvl = mn
        mxlvl = mx

    # hack to make generated intervals from mpl the same for all versions
    autolimit_mode = mpl.rcParams.get('axes.autolimit_mode')
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode='round_numbers')

    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode=autolimit_mode)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs
Пример #11
0
def _nice_intervals(data, nlevs):
    '''
    Purpose::
        Calculates nice intervals between each color level for colorbars
        and contour plots. The target minimum and maximum color levels are
        calculated by taking the minimum and maximum of the distribution
        after cutting off the tails to remove outliers.

    Input::
        data - an array of data to be plotted
        nlevs - an int giving the target number of intervals

    Output::
        clevs - A list of floats for the resultant colorbar levels
    '''
    # Find the min and max levels by cutting off the tails of the distribution
    # This mitigates the influence of outliers
    data = data.ravel()
    mn = mstats.scoreatpercentile(data, 5)
    mx = mstats.scoreatpercentile(data, 95)
    # if min less than 0 and or max more than 0 put 0 in center of color bar
    if mn < 0 and mx > 0:
        level = max(abs(mn), abs(mx))
        mnlvl = -1 * level
        mxlvl = level
    # if min is larger than 0 then have color bar between min and max
    else:
        mnlvl = mn
        mxlvl = mx

    # hack to make generated intervals from mpl the same for all versions
    autolimit_mode = mpl.rcParams.get('axes.autolimit_mode')
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode='round_numbers')

    locator = mpl.ticker.MaxNLocator(nlevs)
    clevs = locator.tick_values(mnlvl, mxlvl)
    if autolimit_mode:
        mpl.rc('axes', autolimit_mode=autolimit_mode)

    # Make sure the bounds of clevs are reasonable since sometimes
    # MaxNLocator gives values outside the domain of the input data
    clevs = clevs[(clevs >= mnlvl) & (clevs <= mxlvl)]
    return clevs
Пример #12
0
 def test_percentile(self):
     x = np.arange(8) * 0.5
     assert_equal(mstats.scoreatpercentile(x, 0), 0.)
     assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
     assert_equal(mstats.scoreatpercentile(x, 50), 1.75)
Пример #13
0
#solution attempt 2 - heatmap
plt.clf()
gridsize = 20
plt.hexbin(xlist, ylist, gridsize=gridsize, cmap=cm.jet, bins=None)
cb = plt.colorbar()
cb.set_label('frequency')

xlabel('percentile by variation in first five plays')
ylabel('percentile by average in second five plays')
print "r = %.3f, p = %.5f" % pearsonr(xlist, ylist)

savefig('explore_exploit_scatterheatmap.png',
        dpi=300,
        facecolor='w',
        edgecolor='w',
        orientation='portrait',
        papertype=None,
        format=None,
        transparent=False,
        bbox_inches='tight',
        pad_inches=0.1)

#now do CI for r value

bootrec = pickle.load(open('save_a5_boot_bootrec.p', 'rb'))
bootrec = bootrec[0]
ci_upper = ssm.scoreatpercentile(bootrec, 97.5)
ci_lower = ssm.scoreatpercentile(bootrec, 02.5)
ci_mean = np.mean(bootrec)
print "Bootstrapped confidence intervals were Upper = %0.3f, Lower = %0.3f" % (
    ci_upper, ci_lower)
Пример #14
0
pickle.dump(a, open('save_a4_1_a.p', 'wb'))

# --------------------------------------------
#calc dict of maximum score for each player(=each key)
maxscore={}
    
for key in big:
    maxscore[key]= max([big[key][attempt][0] for attempt in big[key]])

# sort maximum scores, smallest to biggest
ranked_maxscore=sorted(maxscore[key] for key in maxscore)
        
#calc percentile ranking for each player (=each key)
prcentiles=[]
for p in range(100):
    prcentiles.append(ssm.scoreatpercentile(ranked_maxscore,p))


decile={}
    
for key in big:
    for i in prcentiles:
        if maxscore[key]>i:
            decile[key]=prcentiles.index(float(i))
    
#------------------------------------------------
# now calculate some index of spread
# - the simplest one is range

timespread={}
Пример #15
0
        for attempt in second_plays:
            second.append(sample_wr(bootdata[attempt], 1))
        av2[key] = sp.mean(second)
        var2[key] = sp.var(second)

    #make list of summary stats
    x = []
    y = []
    for key in big:
        x.append(var1[key])
        y.append(av2[key])

    #find percentile values
    prcentiles_x = []
    for p in range(100):
        prcentiles_x.append(ssm.scoreatpercentile(x, p))

    prcentiles_y = []
    for p in range(100):
        prcentiles_y.append(ssm.scoreatpercentile(y, p))

    #make dict of prcentile values for each statistic for each player
    prcentile_xindex = {
        key: bisect.bisect(prcentiles_x, var1[key])
        for key in big
    }
    prcentile_yindex = {
        key: bisect.bisect(prcentiles_y, av2[key])
        for key in big
    }
Пример #16
0
        
#solution attempt 2 - heatmap
plt.clf()
gridsize=20
plt.hexbin(xlist, ylist,gridsize=gridsize, cmap=cm.jet, bins=None)
cb = plt.colorbar()
cb.set_label('frequency')

xlabel('percentile by variation in first five plays')
ylabel('percentile by average in second five plays')
print "r = %.3f, p = %.5f" % pearsonr(xlist,ylist)

savefig('Figure6.png', dpi=300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format=None,
        transparent=False, bbox_inches='tight', pad_inches=0.1) 

generatepaperfigs=0
if generatepaperfigs:
    savefig('../cogsci13/figures/a5_e-e_heatscatter.png', dpi=300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format=None,
        transparent=False, bbox_inches='tight', pad_inches=0.1) 
        
#now do CI for r value

bootrec=pickle.load(open('save_a5_boot_bootrec.p', 'rb'))
bootrec=bootrec[0]
ci_upper=ssm.scoreatpercentile(bootrec,97.5)
ci_lower=ssm.scoreatpercentile(bootrec,02.5)
ci_mean=np.mean(bootrec)
print "Bootstrapped confidence intervals were Upper = %0.3f, Lower = %0.3f" % (ci_upper,ci_lower)
Пример #17
0
 def test_percentile(self):
     x = np.arange(8) * 0.5
     assert_equal(mstats.scoreatpercentile(x, 0), 0.)
     assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
     assert_equal(mstats.scoreatpercentile(x, 50), 1.75)
Пример #18
0
def drawGraphs(outFolder, bootName,  windowSizes1, windowSizes2, zBottom = -1, zTop = 1):
    import matplotlib
    matplotlib.use('PDF')
    from matplotlib import pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    print 'Drawing bootstrap graphs for: {}'.format(bootName)
    bootrec = pickle.load(open('{}/{}/bootrec.p'.format(outFolder,bootName),'rb'))
    plt.close('all')
    Z_obs = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_lower = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_boot = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_upper = np.zeros((len(windowSizes1),len(windowSizes2)))
    Z_std = np.zeros((len(windowSizes1),len(windowSizes2)))
    
    currentFolder = outFolder + '/' + bootName
    
    for i1 in xrange(len(windowSizes1)):
        for i2 in xrange(len(windowSizes2)):
            groupn_i = windowSizes1[i1]       
            groupn_j = windowSizes2[i2]
            curbootrec=bootrec[0,i1,i2]
            print
            print "Analyzing %i - %i" % (groupn_i,groupn_j)
            xlist= pickle.load(open(currentFolder + '/save_a5_xlist' + str(groupn_i) + "," + str(groupn_j) +'.p', 'rb'))
            ylist= pickle.load(open(currentFolder + '/save_a5_ylist' + str(groupn_i) + "," + str(groupn_j) +'.p', 'rb'))
            a,b = pearsonr(xlist,ylist)
                    
            #now do CI for r value
            ci_upper=ssm.scoreatpercentile(curbootrec,97.5)
            ci_lower=ssm.scoreatpercentile(curbootrec,02.5)
            ci_mean=np.mean(curbootrec)
            ci_std=np.var(curbootrec)
            print scipy.stats.norm(ci_mean,ci_std).cdf(abs(a))
            print "r = %.3f, p = %.5f, %s of confidence interval" % (a,b, 'outside' if a > ci_upper or a < ci_lower else 'inside')
            print "Bootstrapped confidence intervals were Upper = %0.3f, Lower = %0.3f" % (ci_upper,ci_lower)
            
            Z_obs[i1][i2] = a 
            Z_upper[i1][i2] = ci_upper         
            Z_lower[i1][i2] = ci_lower    
            Z_boot[i1][i2] = ci_mean
            Z_std[i1][i2] = ci_std
            
            
    X = [[k for j in windowSizes2] for k in windowSizes1]  
    Y = [[j for j in windowSizes2] for k in windowSizes1]
    #One-sided Z value to p value
    Z_p = [[st.norm.sf((Z_obs[i][j] - Z_boot[i][j])/Z_std[i][j]) for j in range(len(windowSizes1))] for i in range(len(windowSizes1))]
    
    fig1 = plt.figure()
    
    fontsize = 16
    ax = fig1.add_subplot(111, projection='3d')
    ax.plot_surface(X, Y, Z_obs, rstride=1, cstride=1)
    fig1.suptitle('Observed Correlations', fontsize=20)
    ax.set_xlabel('Size window 1', fontsize = fontsize)
    ax.set_ylabel('Size window 2', fontsize = fontsize)
    ax.set_zlabel('r', fontsize = fontsize)
    ax.set_zlim(bottom = zBottom, top = zTop)
    
    plt.savefig('{}/figures/{}_corObs.pdf'.format(outFolder,bootName), bbox_inches='tight')
        
    fig2 = plt.figure()
    
    ax = fig2.add_subplot(111, projection='3d')
    ax.plot_surface(X, Y, Z_boot, rstride=1, cstride=1)
    fig2.suptitle('Bootstrap Average Correlations', fontsize=20)
    ax.set_xlabel('Size window 1', fontsize = fontsize)
    ax.set_ylabel('Size window 2', fontsize = fontsize)
    ax.set_zlabel('r', fontsize = fontsize)
    ax.set_zlim(bottom = zBottom, top = zTop)
    
    plt.savefig('{}/figures/{}_corBoot.pdf'.format(outFolder,bootName), bbox_inches='tight')
Пример #19
0
res = [rt.err(table[:, i], table[:, i + 1]) if i == 0 else rt.err(table[:, i], table[:, i + 1], False) for i in xrange(0, 10, 2)]
lab = ["mass residuals", "mass weighted age residuals", "flux weighted age residuals", "metallicity residuals", "dust extinction residuals"]

for j in xrange(len(res)) :
  fig, axs = plt.subplots(5, 13, sharex = True, sharey = True, figsize = (20, 15))

  plt.xlim(-1.5, +1.5)
  plt.ylim(0, 40)

  axs = np.ravel(axs)

  for i in xrange(65) :
    data   = res[j][i * 100:(i + 1) * 100]
    median = np.median(data)
    p16    = st.scoreatpercentile(data, 16.0)
    p84    = st.scoreatpercentile(data, 84.0)

    counts, bins, patches = axs[i].hist(data, 30, histtype = "step", hatch = "///", lw = 1, color = "#1A1A1A", range = (-1.5, +1.5))

    axs[i].axvline(median, ls = "--", lw = 1.5, color = "#000080")
    axs[i].axvline(p16, ls = "-.", lw = 1.5, color = "#000080")
    axs[i].axvline(p84, ls = "-.", lw = 1.5, color = "#000080")

  axs[-1].set_xticks([-1., 0, +1.])
  axs[52].set_yticks(list(axs[i].get_yticks()[1:-1]))
  axs[58].set_xlabel(lab[j], fontsize = 16)
  axs[26].set_ylabel("counts", fontsize = 16)

  plt.tight_layout()
  plt.subplots_adjust(wspace = 0.01, hspace = 0.01, bottom = 0.06)
Пример #20
0

# --------------------------------------------
#calc dict of maximum score for each player(=each key)
maxscore={}
    
for key in big:
    maxscore[key]= max([big[key][attempt][0] for attempt in big[key]])

# sort maximum scores, smallest to biggest
ranked_maxscore=sorted(maxscore[key] for key in maxscore)

#calc percentile ranking for each player (=each key)
prcentiles=[]
for p in range(100):
    prcentiles.append(ssm.scoreatpercentile(ranked_maxscore,p))


#decile={}
#    
#for key in big:
#    for i in prcentiles:
#        if maxscore[key]>i:
#            decile[key]=prcentiles.index(float(i))

#so now we know how good each player is

#now let's calc variance

av1={}
var1={}
Пример #21
0
big = {k: data[k] for k in data if len(data[k]) > 9}  #pythonic

# --------------------------------------------
#calc dict of maximum score for each player(=each key)
maxscore = {}

for key in big:
    maxscore[key] = max([big[key][attempt][0] for attempt in big[key]])

# sort maximum scores, smallest to biggest
ranked_maxscore = sorted(maxscore[key] for key in maxscore)

#calc percentile ranking for each player (=each key)
prcentiles = []
for p in range(100):
    prcentiles.append(ssm.scoreatpercentile(ranked_maxscore, p))

#decile={}
#
#for key in big:
#    for i in prcentiles:
#        if maxscore[key]>i:
#            decile[key]=prcentiles.index(float(i))

#so now we know how good each player is

#now let's calc variance

av1 = {}
var1 = {}
av2 = {}
Пример #22
0
execfile("fig4_boot.py") #this can take a long time (e.g. 24 hours) if you use many (e.g. 2000) resamples

#load 
#observed data
plot_timespread = pickle.load(open('save_plot_timespread.p', 'rb'))
#bootstrap data
bootdata = pickle.load(open('save_a4_2boot_bootdata.p','rb'))

#find CIs, using ssm

ci_upper=np.zeros( (1,100))
ci_lower=np.zeros( (1,100))
m_boot=np.zeros( (1,100))
 
for i in range(100):
    ci_upper[0,i]=ssm.scoreatpercentile(bootdata[i,:],97.5)
    ci_lower[0,i]=ssm.scoreatpercentile(bootdata[i,:],02.5)
    m_boot[0,i]=np.mean(bootdata[i,:])

print "PLOTTING"

# plot -------------------------------------------
# thank you tomas http://www.staff.ncl.ac.uk/tom.holderness/software/pythonlinearfit
plt.clf()
    
# plot sample data
plot(plot_timespread,'ro',label='Sample observations')
 
# plot line of best fit
plot(m_boot[(0,)],'b-',label='bootstrap_mean')
Пример #23
0
bootdata = np.zeros((100, boot_n))

print "Starting bootstrap calculations"
for n in range(boot_n):

    print "iteration " + str(n) + " of " + str(boot_n)
    #find maxscores, when actual scores are a sample [attempts] long of a
    #maxscore_boot={key: max(random.sample(a,len(big[key]))) for key in big}
    maxscore_boot = {key: max(sample_wr(a, len(big[key]))) for key in big}

    # sort maximum scores, smallest to biggest, put in list
    ranked_maxscore_boot = sorted(maxscore_boot[key] for key in maxscore_boot)

    #calculate percentiles on these bootstrapped maximum scores
    prcentiles_boot = [
        ssm.scoreatpercentile(ranked_maxscore_boot, p) for p in range(100)
    ]

    #assign prcentile to key in decile_boot
    decile_boot = {
        key: bisect.bisect(prcentiles_boot, maxscore_boot[key])
        for key in big
    }

    #now calculate timespread to score percentile, using these
    #bootstrapped maximum scores
    spreads_b = np.zeros((100, 1))  #holding var for the time
    counts_b = np.zeros((100, 1))  #holding var for the number of players' data

    #sort timespread into holding variables according to decile value
    for key in decile_boot:
Пример #24
0
    #load
    #observed data
    plot_timespread = pickle.load(open('save_plot_timespread.p', 'rb'))
    #bootstrap data
    bootdata = pickle.load(open('save_a4_2boot_bootdata.p', 'rb'))

print "finding CIs"

#find CIs, using ssm

ci_upper = np.zeros((1, 100))
ci_lower = np.zeros((1, 100))
m_boot = np.zeros((1, 100))

for i in range(100):
    ci_upper[0, i] = ssm.scoreatpercentile(bootdata[i, :], 97.5)
    ci_lower[0, i] = ssm.scoreatpercentile(bootdata[i, :], 02.5)
    m_boot[0, i] = np.mean(bootdata[i, :])

print "running t-test"

#make the same shape
expt = a = np.reshape(m_boot[(0, )],
                      100)  #expected values (from bootstrap, ie under H0)
obsv = a = np.reshape(plot_timespread, 100)  #observed values

#recode so that a positive difference supports the theory (ie that spacing helps)
#for the bottom 50% this means their observed is lower than expected
#for the top 50% this means their obseved is higher than expected
diffs = np.concatenate([expt[0:50] - obsv[0:50], obsv[50:100] - expt[50:100]])
Пример #25
0
    "flux weighted age residuals", "metallicity residuals",
    "dust extinction residuals"
]

for j in xrange(len(res)):
    fig, axs = plt.subplots(5, 13, sharex=True, sharey=True, figsize=(20, 15))

    plt.xlim(-1.5, +1.5)
    plt.ylim(0, 40)

    axs = np.ravel(axs)

    for i in xrange(65):
        data = res[j][i * 100:(i + 1) * 100]
        median = np.median(data)
        p16 = st.scoreatpercentile(data, 16.0)
        p84 = st.scoreatpercentile(data, 84.0)

        counts, bins, patches = axs[i].hist(data,
                                            30,
                                            histtype="step",
                                            hatch="///",
                                            lw=1,
                                            color="#1A1A1A",
                                            range=(-1.5, +1.5))

        axs[i].axvline(median, ls="--", lw=1.5, color="#000080")
        axs[i].axvline(p16, ls="-.", lw=1.5, color="#000080")
        axs[i].axvline(p84, ls="-.", lw=1.5, color="#000080")

    axs[-1].set_xticks([-1., 0, +1.])
Пример #26
0
boot_n = 2000  # define how many resamples the bootstrap uses
bootdata = np.zeros((100, boot_n))

print "Starting bootstrap calculations"
for n in range(boot_n):

    print "iteration " + str(n) + " of " + str(boot_n)
    # find maxscores, when actual scores are a sample [attempts] long of a
    # maxscore_boot={key: max(random.sample(a,len(big[key]))) for key in big}
    maxscore_boot = {key: max(sample_wr(a, len(big[key]))) for key in big}

    # sort maximum scores, smallest to biggest, put in list
    ranked_maxscore_boot = sorted(maxscore_boot[key] for key in maxscore_boot)

    # calculate percentiles on these bootstrapped maximum scores
    prcentiles_boot = [ssm.scoreatpercentile(ranked_maxscore_boot, p) for p in range(100)]

    # assign prcentile to key in decile_boot
    decile_boot = {key: bisect.bisect(prcentiles_boot, maxscore_boot[key]) for key in big}

    # now calculate timespread to score percentile, using these
    # bootstrapped maximum scores
    spreads_b = np.zeros((100, 1))  # holding var for the time
    counts_b = np.zeros((100, 1))  # holding var for the number of players' data

    # sort timespread into holding variables according to decile value
    for key in decile_boot:
        spreads_b[decile_boot[key] - 1] += timespread[key]
        counts_b[decile_boot[key] - 1] += 1

    t = spreads_b / counts_b  # find average
Пример #27
0
            second.append(sample_wr(bootdata[attempt],1))      
        av2[key]=sp.mean(second) 
        var2[key]=sp.var(second)
    
    
    #make list of summary stats
    x=[]
    y=[]
    for key in big:
        x.append(var1[key])
        y.append(av2[key])
    
    #find percentile values
    prcentiles_x=[]
    for p in range(100):
        prcentiles_x.append(ssm.scoreatpercentile(x,p))
    
    prcentiles_y=[]
    for p in range(100):
        prcentiles_y.append(ssm.scoreatpercentile(y,p))
    
    
    #make dict of prcentile values for each statistic for each player
    prcentile_xindex={key: bisect.bisect(prcentiles_x,var1[key]) for key in big}
    prcentile_yindex={key: bisect.bisect(prcentiles_y,av2[key]) for key in big}
           
#    #plot subset       
#    i=1001
#    for key in big:
#        i+=1
#        plot(prcentile_xindex[key],prcentile_yindex[key],'b.')