def RunTest(N, times, p): c = Coin(p) d = dict.fromkeys(N) for n in N: runs = range(times) rejected = 0 for irun in runs: head_win = 0 tail_win = 0 for i in xrange(n): if c.next() == "head": head_win += 1 else: tail_win += 1 pvalue = binom_test(head_win,n,0.5) if pvalue < 0.05: rejected += 1 d[n] = rejected return d
def first_order_analysis(s, significance=0.05): symbols = ['L', 'R'] results = [] N = len(s) if N == 0: raise ValueError('Empty string') for x in s: if not x in symbols: raise ValueError('Unknown character %r in string.' % x) n_L = count_overlapping(s, 'L') n_R = count_overlapping(s, 'R') n_RL = count_overlapping(s, 'RL') n_LL = count_overlapping(s, 'LL') n_RR = count_overlapping(s, 'RR') n_LR = count_overlapping(s, 'LR') fair_pvalue = scipy.stats.binom_test(n_L, N, 0.5) fair_rejected = fair_pvalue < significance # Get a confidence interval p_L_lb, p_L_ub = binofit(n_L, N, significance) # Run the test for the lower bound ps = np.linspace(p_L_lb, p_L_ub, 50) pvalues = [] whys = [] for p in ps: # we check if any of the two is significant RL_pvalue_p = binom_test(n_RL, n_R, p) LL_pvalue_p = binom_test(n_LL, n_L, p) pvalue_p = min([ RL_pvalue_p, LL_pvalue_p ]) # More detailed test (somewhat redudand) # We want to see if we are significantly POS or NEG correlated. LL_significantly_positive = binom.cdf(n_LL, n_L, p) > 1 - significance LL_significantly_negative = binom.cdf(n_LL, n_L, p) < significance # note: there was a bug in Matlab RL_significantly_negative = binom.cdf(n_RL, n_R, p) > 1 - significance RL_significantly_positive = binom.cdf(n_RL, n_R, p) < significance significantly_negative = LL_significantly_negative or RL_significantly_negative significantly_positive = LL_significantly_positive or RL_significantly_positive if significantly_negative: correlation = '-' elif significantly_positive: correlation = '+' else: correlation = '' whys.append(correlation) pvalues.append(pvalue_p) best = np.argmax(pvalues) best_p = ps[best] indep_pvalue = pvalues[best] indep_rejected = indep_pvalue < significance why = whys[best] if indep_rejected else '' results.extend([ ('significance', significance), ('N', N), ('n_L', n_L), ('p_L', zdiv(n_L, N)), ('n_R', n_R), ('p_R', zdiv(n_R, N)), ('n_RL', n_RL), ('p_RL', zdiv(n_RL, n_R)), ('n_LL', n_LL), ('p_LL', zdiv(n_LL, n_L)), ('n_RR', n_RR), ('p_RR', zdiv(n_RR, n_R)), ('n_LR', n_LR), ('p_LR', zdiv(n_LR, n_L)), ('fair_pvalue', fair_pvalue), ('fair_rejected', fair_rejected), ('p_L_lb', p_L_lb), ('p_L_ub', p_L_ub), ('indep_pvalue', indep_pvalue), ('indep_rejected', indep_rejected), ('why', why), ('best_p_L', best_p), ]) return results
def binom_p(self): return binom_test(self.goals(), self.n(), self.exp_goals_frac())
def las_vegas_report(outdir, page_id, results): # threshold for considering 0 response #eps = 0.0001 # eps = 0.001 # eps = 0 r = Report('lasvegas_' + page_id) f = r.figure('summary', cols=4, caption='Response to various filters') f_overlap = r.figure('summary-overlap', cols=4, caption='Response area (overlap) of various filters') kernels = sorted(results.keys()) for kernel in kernels: sign = results[kernel]['signs'] response = results[kernel]['response'] # overlap = results[kernel]['overlap'] overlap = numpy.abs(response) eps = percentile(overlap, 75) matched_filter = results[kernel]['kernel'] left = numpy.nonzero(sign == +1) right = numpy.nonzero(sign == -1) response_right = response[right] response_left = response[left] n = r.node(kernel) with n.data_pylab('response') as pylab: try: b = numpy.percentile(response_left, 95) #@UndefinedVariable except: b = scipy.stats.scoreatpercentile(response_left, 95) def plothist(x, nbins, eps, **kwargs): nz, = numpy.nonzero(numpy.abs(x) > eps) # x with nonzero response print "using %d/%d" % (len(nz), len(x)) xnz = x[nz] hist, bin_edges = numpy.histogram(xnz, range=(-b, b), bins=nbins) bins = (bin_edges[:-1] + bin_edges[1:]) * 0.5 pylab.plot(bins, hist, **kwargs) nbins = 500 plothist(response_left, nbins, eps, label='left') plothist(response_right, nbins, eps, label='right') a = pylab.axis() pylab.axis([-b, b, 0, a[-1]]) pylab.legend() f.sub('%s/response' % kernel) with n.data_pylab('overlap') as pylab: def plothist2(x, nbins, **kwargs): hist, bin_edges = numpy.histogram(x, bins=nbins) bins = (bin_edges[:-1] + bin_edges[1:]) * 0.5 pylab.plot(bins, hist, **kwargs) nbins = 200 # plothist2(overlap, nbins, label='overlap') pylab.hist(overlap, nbins, log=True, label='hist of abs.response') a = pylab.axis() pylab.plot([ eps, eps], [a[2], a[3]], 'r-', label='threshold') #pylab.axis([-b, b, 0, a[-1]]) pylab.legend() f_overlap.sub('%s/overlap' % kernel) def ratio2perc(i, n): p = 100.0 * i / n return "%.1f" % p def perc(x): pos, = numpy.nonzero(x) return ratio2perc(len(pos), len(x)) cols = ['probability', 'no response', 'guessed L', 'guessed R'] rows = ['left saccade', 'right saccade'] table = [ [ perc(sign == +1), perc(numpy.abs(response_left) < eps), perc(response_left > eps), perc(response_left < -eps) ], [ perc(sign == -1), perc(numpy.abs(response_right) < eps), perc(response_right > eps), perc(response_right < -eps) ], ] n.table('performance', data=table, rows=rows, cols=cols) use_eps = eps total = len(sign) given = numpy.abs(response) > use_eps num_given = len(numpy.nonzero(given)[0]) correct = numpy.logical_or( numpy.logical_and(response > use_eps, sign == +1), numpy.logical_and(response < -use_eps, sign == -1) ) num_correct = len(numpy.nonzero(correct)[0]) perc_given = ratio2perc(num_given, total) perc_not_given = ratio2perc(len(sign) - num_given, len(sign)) # perc_correct_abs = ratio2perc(num_correct, total) perc_correct_given = ratio2perc(num_correct, num_given) signif = 0.01 expected = \ scipy.stats.binom.ppf([signif / 2, 1 - signif / 2], num_given, 0.5) / num_given #cdf = scipy.stats.binom.cdf(perc_correct_given, num_given, 0.5) pvalue = binom_test(num_correct, num_given, 0.5) cols = ['no response', 'with response', 'correct (%given)', 'p-value', 'bounds under H0'] table = [ [ perc_not_given, perc_given, perc_correct_given, "%.4f" % pvalue, "[%.1f, %.1f]" % (100 * expected[0], 100 * expected[1]) ], ] n.table('performance2', data=table, cols=cols) add_posneg(n, 'kernel', matched_filter) output_file = os.path.join(outdir, '%s.html' % r.id) resources_dir = os.path.join(outdir, 'images') print("Writing to %s" % output_file) r.to_html(output_file, resources_dir=resources_dir)