def Main(): # make a redditor with some trustworthiness (mean_t = 0.67) founder = Redditor(name='redditor') beta = thinkbayes.Beta(2, 1) for val, prob in beta.MakePmf().Items(): founder.Set(val * 100, prob) # make a new item with unknown quality (mean_q = 0.5) item = Item(range(0, 101), name='item') # compute the means mean_t = founder.Mean() / 100.0 mean_q = item.Mean() / 100.0 print mean_t print mean_q # perform simultaneous updates founder.Update(('up', mean_q)) item.Update(('up', mean_t)) Summarize(item) # display the posterior distributions myplot.Pmf(founder) myplot.Pmf(item) myplot.Show()
def ExpoErlangDemo(): num = 10 lam1 = 1 lam2 = 2 t = MakeSeries(num, lam1, num, lam2) series = Series(t) n, s1, m, s2 = series.Split(num) print n, s1, m, s2 low, high = 0.01, 5.01 lams = numpy.linspace(low, high, 101) expo = Expo(lams) expo.name = 'expo' expo.Update((n, s1)) erlang = Erlang(lams) erlang.name = 'erlang' erlang.Update((n, s1)) myplot.Pmf(expo) myplot.Pmf(erlang) myplot.Show()
def main(): resp = brfss.Respondents() resp.ReadRecords(data_dir='res') d = resp.SummarizeHeight() man_d = d[1] lady_d = d[2] # 男性的mu, var, sigma, 变异系数CV man_mu, man_var = thinkstats.TrimmedMeanVar(man_d) man_sigma = math.sqrt(man_var) man_cv = man_sigma/man_mu print("man: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (man_mu, man_var, man_sigma, man_cv)) # 女性的mu, var, sigma, 变异系数CV lady_mu, lady_var = thinkstats.TrimmedMeanVar(lady_d) lady_sigma = math.sqrt(lady_var) lady_cv = lady_sigma/lady_mu print("lady: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (lady_mu, lady_var, lady_sigma, lady_cv)) # 男性, 女性Hist分布 man_hist = Pmf.MakeHistFromList(man_d, name='man hist') myplot.Hist(man_hist) myplot.Show() myplot.Clf() lady_hist = Pmf.MakeHistFromList(lady_d, name='lady hist') myplot.Hist(lady_hist) myplot.Show() myplot.Clf() # 男性, 女性Pmf分布 man_pmf = Pmf.MakePmfFromHist(man_hist, name='man pmf') myplot.Pmf(man_pmf) myplot.Show() myplot.Clf() lady_pmf = Pmf.MakePmfFromHist(lady_hist, name='lady pmf') myplot.Pmf(lady_pmf) myplot.Show() myplot.Clf() # 男性/女性Cdf累积分布 man_cdf = Cdf.MakeCdfFromPmf(man_pmf, name='man cdf') lady_cdf = Cdf.MakeCdfFromPmf(lady_pmf, name='lady cdf') myplot.Cdfs((man_cdf, lady_cdf), complement=False, transform=None) myplot.Show()
def main(): # make a uniform prior param = 1.2 prior = MakeUniformSuite(0.5, 1.5, 1000) # try out the sample in the book t = [] sample = [2.675, 0.198, 1.152, 0.787, 2.717, 4.269] name = 'post%d' % len(sample) posterior = EstimateParameter(prior, sample, name) t.append(posterior) # try out a range of sample sizes for n in [10, 20, 40]: # generate a sample sample = [random.expovariate(param) for _ in range(n)] name = 'post%d' % n # compute the posterior posterior = EstimateParameter(prior, sample, name) t.append(posterior) # plot the posterior distributions for i, posterior in enumerate(t): pyplot.subplot(2, 2, i+1) myplot.Pmf(posterior) pyplot.xlabel('lambda') pyplot.ylabel('Posterior probability') pyplot.legend() myplot.Save(root='posteriors')
def process(data): # Hist 分布图 hist = Pmf.MakeHistFromList(data, name='hist') myplot.Hist(hist, color='blue') myplot.Show() # Pmf 分布图 pmf = Pmf.MakePmfFromHist(hist, name='pmf') myplot.Pmf(pmf, color='yellow') myplot.Show() myplot.Clf() # 实际数据的CDF分布图 cdf = Cdf.MakeCdfFromList(data, name='loafs') myplot.Cdf(cdf) mu, var = thinkstats.MeanVar(data) sigma = math.sqrt(var) print("mu = %.3f, sigma = %.3f" % (mu, sigma)) # 正态分布 xs = normal_sample(len(data), mu, sigma) # xs = data ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in xs] myplot.Scatter(xs, ys, color='red', label='sample') myplot.Show()
def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) # plot the distribution of actual speeds pmf = Pmf.MakePmfFromList(speeds, 'actual speeds') # myplot.Clf() # myplot.Hist(pmf) # myplot.Save(root='observed_speeds', # title='PMF of running speed', # xlabel='speed (mph)', # ylabel='probability') # plot the biased distribution seen by the observer biased = BiasPmf(pmf, 7.5, name='observed speeds') myplot.Pmf(biased) myplot.Show(title='soln. PMF of running speed', xlabel='speed (mph)', ylabel='probability') myplot.Clf() myplot.Hist(biased) myplot.Save(root='observed_speeds', title='PMF of running speed', xlabel='speed (mph)', ylabel='probability') cdf = Cdf.MakeCdfFromPmf(biased) myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='observed_speeds_cdf', title='CDF of running speed', xlabel='speed (mph)', ylabel='cumulative probability')
def observe_data(l, name=None, show=False): cdf = pmf = None if isinstance(l, list): cdf = Cdf.MakeCdfFromList(l,name+' cdf') pmf = Pmf.MakePmfFromList(l, name+' pmf') elif isinstance(l, Pmf.Pmf): pmf = l cdf = Cdf.MakeCdfFromPmf(l) if name is None: name = pmf.name elif isinstance(l, Cdf.Cdf): cdf = l if name is None: name = cdf.name else: raise Exception('input parameter type is wrong') v_25, median, v_75 = cdf.Percentile(25), cdf.Percentile(50), cdf.Percentile(75) mean = cdf.Mean() print('%s: 1/4:%4.2f(%4.2f), 1/2:%4.2f(mean-median:%4.2f), mean:%4.2f, 3/4:%4.2f(%4.2f)' % \ (name, v_25, median-v_25, median, mean-median, mean, v_75,v_75-median)) if show: if pmf is not None: myplot.Pmf(pmf) myplot.Show() myplot.Cdf(cdf) myplot.Show()
def PlotPmf(results): speeds = GetSpeeds(results) pmf = Pmf.MakePmfFromList(speeds, 'speeds') myplot.Pmf(pmf, title='PMF of running speed', xlabel='speed (mph)', ylabel='probability', show=True)
def main(): results = ReadResults() speeds = GetSpeeds(results) pmf = Pmf.MakePmfFromList(speeds, 'speeds') myplot.Pmf(pmf) myplot.Show(title='PMF of running speed', xlabel='speed (mph)', ylabel='probability')
def main(): hypos = xrange(100, 1001) suite = Train(hypos) suite.Update(321) print suite.Mean() myplot.Pmf(suite) myplot.Show()
def main(): list = [100 * random.random() for i in range(1000)] pmf = Pmf.MakePmfFromList(list, name='pfm') cdf = Cdf.MakeCdfFromList(list, name='cdf') myplot.Pmf(pmf) myplot.Show() myplot.Clf() myplot.Cdf(cdf) myplot.Show()
def PlotSurvivalCurve(ts, lams, ss): # scale lams denom = max(lams.Probs()) lams.Normalize(denom) myplot.Pmf(lams, line_options=dict(linewidth=2, linestyle='dashed', color='0.7')) pyplot.plot(ts, ss, linewidth=2, color='blue', label='survival') myplot.Save(root='seer1', title='', xlabel='Survival time (years)', ylabel='Probability')
def main(): suite = Euro(range(0, 101)) #myplot.Pmf(suite) #myplot.Show() for i in range(140): suite.Update('H') for i in range(110): suite.Update('T') myplot.Pmf(suite) myplot.Show()
def MakeFigures(exam, alice, bob): formats = ['png'] myplot.Pmf(exam.prior, label='prior') myplot.Save(root='sat_prior', formats=formats, xlabel='p', ylabel='PMF') myplot.Clf() myplot.Pmfs([alice, bob]) myplot.Save(root='sat_posterior', formats=formats, xlabel='p', ylabel='PMF')
def Main(): suite = Liar(y=0.1) dataset = 'H' for data in dataset: suite.Update(data) Summarize(suite) myplot.Pmf(suite) myplot.Show()
def main(): suite = MakeUniformSuite(0.0, 1.0, 1001) evidence = 140, 110 Update(suite, evidence) suite.name = 'posterior' # plot the posterior distributions myplot.Pmf(suite) myplot.Show(title='Biased coin', xlabel='P(heads)', ylabel='Posterior probability')
def main(script): # make an exam object with data from the 2010 SAT exam = Exam() # look up Alice's raw score alice = 780 alice_correct = exam.GetRawScore(alice) print 'Alice raw score', alice_correct # display the distribution of raw scores for the population prior = exam.GetPrior() myplot.Pmf(prior, show=True)
def ExpoDemo(): num = 10 lam1 = 1 lam2 = 2 t = MakeSeries(num, lam1, num, lam2) series = Series(t) n, s1, m, s2 = series.Split(num) print n, s1, m, s2 low, high = 0.01, 5.01 lams = numpy.linspace(low, high, 101) expo = Expo(lams) expo.Update((n, s1)) expo2 = Expo(lams) expo2.Update((m, s2)) myplot.Pmf(expo) myplot.Pmf(expo2) myplot.Show()
def PlotPosteriorSigma(posterior): ci = CredibleInterval(posterior, 90) print 'CI:', ci pyplot.clf() PlotCredibleInterval(posterior, ci) myplot.Pmf(posterior, root='sigma', clf=False, title='Posterior PMF', xlabel='sigma', ylabel='probability', show=True)
def main(): suite = MakeUniformSuite(0.001, 1.5, 1000) evidence = [1.5, 2, 3, 4, 5, 12] Update(suite, evidence) suite.name = 'posterior' # plot the posterior distributions myplot.Pmf(suite) myplot.Show(title='Decay parameter', xlabel='Parameter (inverse cm)', ylabel='Posterior probability') print 'Naive parameter estimate:', 1.0 / thinkstats.Mean(evidence) print 'Mean of the posterior distribution:', suite.Mean()
def main(): # Exercise 3.1 d = { 7: 8, 12: 8, 17: 14, 22: 4, 27: 6, 32: 12, 37: 8, 42: 3, 47: 2 } classSizeDean = Pmf.MakePmfFromDict(d, name='Actual') print(classSizeDean.Mean()) classSizeStudent = classSizeDean.Copy(name='Student Perspective') for x, _ in classSizeStudent.Items(): classSizeStudent.Mult(x, x) classSizeStudent.Normalize() print(classSizeStudent.Mean()) classSizeUnbaised = UnbiasPmf(classSizeStudent, 'Student Unbiased') print(classSizeUnbaised.Mean()) getValue = itemgetter(0) deanPlot = sorted(classSizeDean.Items(), key=getValue) studentPlot = sorted(classSizeStudent.Items(), key=getValue) plt.plot(zip(*deanPlot)[0], zip(*deanPlot)[1], 'g-', label='Actual') plt.plot(zip(*studentPlot)[0], zip(*studentPlot)[1], 'r-', label='Student Perspective') plt.legend(loc=4) plt.xlabel('Class Size') plt.ylabel('Probability') plt.show() #Exercise 3.2 results = relay.ReadResults() speeds = relay.GetSpeeds(results) unbaisedSpeedsPmf = Pmf.MakePmfFromList(speeds, 'speeds') biasedSpeedsPmf = BiasPmf(unbaisedSpeedsPmf, 7.5, '7.5 mph biased speeds') biasedPlot = sorted(biasedSpeedsPmf.Items(), key=getValue) myplot.Pmf(biasedSpeedsPmf) myplot.Show(title='7.5mph biased speeds', xlabel='speeds (mph)', ylabel='probability')
def main(): ran = generate_random_sample(1000) pmf = Pmf.MakePmfFromList(ran) cdf = Cdf.MakeCdfFromPmf(pmf) myplot.Cdf(cdf) myplot.show() myplot.scatter(*cdf.Render()) myplot.show() myplot.Hist(pmf) myplot.show() myplot.Pmf(pmf) myplot.show()
def main(script, *args): pmf = UniformOdds() cdf = Cdf.MakeCdfFromPmf(pmf) myplot.Cdf(cdf, show=True) return beta = Beta(1, 0) pmf = beta.Pmf() myplot.Pmf(pmf, show=True, xlabel='Probability of sunrise: p', ylabel='Probability density', title='Beta distribution') cdf = beta.Cdf() print cdf.Percentile(5) print cdf.Percentile(95) print cdf.Prob(0.5)
def PlotPosteriorPmf(self, root=None, clf=False): if root: clf = True if clf: pyplot.clf() posterior = self.Pmf() cdf = self.Cdf() low, high = cdf.Percentile(5), cdf.Percentile(95) xs = [x for x in posterior.Values() if low <= x <= high] ys = [posterior.Prob(x) for x in xs] pyplot.fill_between(xs, ys, y2=0.0001, color='blue', alpha=0.2) myplot.Pmf(posterior, root=root, clf=False, xlabel='# of taxa', ylabel='prob', legend=False)
def main(): upper_bound = 200 prior = MakeUniformSuite(1, upper_bound, upper_bound) prior.name = 'prior' evidence = 60 posterior = prior.Copy() Update(posterior, evidence) posterior.name = 'posterior' print CredibleInterval(posterior, 90) # plot the posterior distribution pyplot.subplots_adjust(wspace=0.4, left=0.15) plot_options = dict(linewidth=2) myplot.Pmf(posterior, **plot_options) myplot.Save(root='locomotive', title='Locomotive problem', xlabel='Number of trains', ylabel='Posterior probability')
def main(): # Exercise 3.9 table = survey.Pregnancies() table.ReadRecords() unfilteredLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz) for p in table.records if p.outcome == 1] liveBirthWeights = [ lbs * 16 + oz for lbs, oz in unfilteredLiveBirthWeights if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200 ] liveBirthWeightsCdf = Cdf.MakeCdfFromList(liveBirthWeights, name="live birth weights") samepleListLiveBirthWeights = sample(liveBirthWeightsCdf, 1000) myplot.Cdf(Cdf.MakeCdfFromList(samepleListLiveBirthWeights)) myplot.show(title="CDF of live births resampled") # Exercise 3.10 randomList = [random.random() for x in range(1000)] myplot.Pmf(Pmf.MakePmfFromList(randomList)) myplot.show(title="random pmf") myplot.Cdf(Cdf.MakeCdfFromList(randomList)) myplot.Show(title="random cdf")
def main(): p = optparse.OptionParser() p.add_option('--infile', '-i') p.add_option('--outfile', '-o') options, arguments = p.parse_args() ifile = sys.stdin ofile = sys.stdout if options.infile is not None: ifile = open(options.infile, 'r') if options.outfile is not None: ofile = open(options.outfile, 'w') word_count_dict = scan_file(ifile) cdf = Cdf.MakeCdfFromList(word_count_dict.values()) pmf = Pmf.MakePmfFromList(word_count_dict.values()) #myplot.Cdf(cdf, transform='pareto') #myplot.Show(title="KJV Biblical word frequency", complement=True, xscale='log', yscale='log') myplot.Pmf(pmf) myplot.Show(title='KJV Biblical word frequency', xscale='log', yscale='log')
import Cdf import populations import math import myplot import numpy import Pmf pops = populations.ReadData() bucketed_pops = map(lambda(x):50*math.floor(x/50.0),pops) pmf = Pmf.MakePmfFromList(bucketed_pops) cdf = Cdf.MakeCdfFromPmf(pmf) myplot.Pmf(pmf) myplot.Show(title="Pmf of populations", xscale='log') myplot.Cdf(cdf) myplot.Show(title="Cdf of populations", inverse=True, xscale='log') xs = sorted(numpy.random.normal(0, 1, len(pops))) ys = sorted(bucketed_pops) myplot.Plot(xs, ys) myplot.Show(title="Normal plot for populations") ys2 = sorted(map(lambda(y):math.log10(y+1),bucketed_pops)) myplot.Plot(xs, ys2) myplot.Show(title="LogNormal plot for populations") #it looks more like a lognormal, but with a hard lower bound (as expected)
def MakePmf(table): lengths = [record.prglength for record in table.records] pmf = Pmf.MakePmfFromList(lengths, name='pregnancy length') myplot.Pmf(pmf, show=True, xlabel='weeks', ylabel='probability')
# Example 3-10 import random, Pmf, Cdf, myplot size = 10000 lst = [random.random() for i in range(size)] lst_pmf = Pmf.MakePmfFromList(lst) lst_cdf = Cdf.MakeCdfFromList(lst) myplot.Clf() myplot.Pmf(lst_pmf) myplot.Show(title='PMF of {0} randoms'.format(size)) myplot.Clf() myplot.Cdf(lst_cdf) myplot.Show(title='CDF of {0} randoms'.format(size)) # yes, the distribution is uniform