def RunLoop(gap_times, nums, lam=0.0333): """Runs the basic analysis for a range of num_passengers. gap_times: sequence of float nums: sequence of values for num_passengers lam: arrival rate in passengers per second Returns: WaitMixtureEstimator """ global UPPER_BOUND UPPER_BOUND = 4000 thinkplot.Clf() RandomSeed(18) # resample gap_times n = 220 cdf_z = thinkbayes.MakeCdfFromList(gap_times) sample_z = cdf_z.Sample(n) pmf_z = thinkbayes.MakePmfFromList(sample_z) # compute the biased pmf and add some long delays cdf_zp = BiasPmf(pmf_z).MakeCdf() sample_zb = cdf_zp.Sample(n) + [1800, 2400, 3000] # smooth the distribution of zb pdf_zb = thinkbayes.EstimatedPdf(sample_zb) xs = MakeRange(low=60) pmf_zb = pdf_zb.MakePmf(xs) # unbias the distribution of zb and make wtc pmf_z = UnbiasPmf(pmf_zb) wtc = WaitTimeCalculator(pmf_z) probs = [] for num_passengers in nums: ete = ElapsedTimeEstimator(wtc, lam, num_passengers) # compute the posterior prob of waiting more than 15 minutes cdf_y = ete.pmf_y.MakeCdf() prob = 1 - cdf_y.Prob(900) probs.append(prob) # thinkplot.Cdf(ete.pmf_y.MakeCdf(name=str(num_passengers))) thinkplot.Plot(nums, probs) thinkplot.Save( root='redline5', xlabel='Num passengers', ylabel='P(y > 15 min)', formats=FORMATS, )
def PlotPosterior(suite, pcolor=False, contour=True): """Makes a contour plot. suite: Suite that maps (mu, sigma) to probability """ thinkplot.Clf() thinkplot.Contour(suite.GetDict(), pcolor=pcolor, contour=contour) thinkplot.Save(root='variability_posterior_%s' % suite.name, title='Posterior joint distribution', xlabel='Mean height (cm)', ylabel='Stddev (cm)')
def MakePlots(player1, player2): """Generates two plots. price1 shows the priors for the two players price2 shows the distribution of diff for the two players """ # plot the prior distribution of price for both players thinkplot.Clf() thinkplot.PrePlot(num=2) pmf1 = player1.PmfPrice() pmf1.name = 'showcase 1' pmf2 = player2.PmfPrice() pmf2.name = 'showcase 2' thinkplot.Pmfs([pmf1, pmf2]) thinkplot.Save(root='price1', xlabel='price ($)', ylabel='PDF', formats=FORMATS) # plot the historical distribution of underness for both players thinkplot.Clf() thinkplot.PrePlot(num=2) cdf1 = player1.CdfDiff() cdf1.name = 'player 1' cdf2 = player2.CdfDiff() cdf2.name = 'player 2' print 'Player median', cdf1.Percentile(50) print 'Player median', cdf2.Percentile(50) print 'Player 1 overbids', player1.ProbOverbid() print 'Player 2 overbids', player2.ProbOverbid() thinkplot.Cdfs([cdf1, cdf2]) thinkplot.Save(root='price2', xlabel='diff ($)', ylabel='CDF', formats=FORMATS)
def PlotJointDist(self): """Makes a pcolor plot of the age-size joint distribution.""" thinkplot.Clf() joint = self.cache.GetDistAgeSize() thinkplot.Contour(joint, contour=False, pcolor=True) thinkplot.Save(root='kidney8', formats=FORMATS, axis=[0, 41, -0.7, 1.31], yticks=MakeLogTicks([0.2, 0.5, 1, 2, 5, 10, 20]), xlabel='ages', ylabel='diameter (cm, log scale)')
def PlotSuites(suites, root): """Plots two suites. suite1, suite2: Suite objects root: string filename to write """ thinkplot.Clf() thinkplot.PrePlot(len(suites)) thinkplot.Pmfs(suites) thinkplot.Save(root=root, xlabel='x', ylabel='Probability', formats=['pdf', 'eps'])
def _test1(show=0): # 已知r, 求n的分布 即泊松分布 r = 150 # MakePoissonPmf: 存在一个上限(无极限), 需要归一化 pmf = thinkbayes2.MakePoissonPmf(r, 2 * r, 1) if show: thinkplot.Clf() thinkplot.Pmf(pmf) thinkplot.Show(title="test1", xlabel='Event Count', ylabel='Probality') print("Total: ", pmf.Total()) return pmf
def PlotPriorDist(pmf): """Plot the prior distribution of p_correct. pmf: prior """ thinkplot.Clf() thinkplot.PrePlot(num=1) cdf1 = thinkbayes.MakeCdfFromPmf(pmf, 'prior') thinkplot.Cdf(cdf1) thinkplot.Save(root='sat_prior', xlabel='p_correct', ylabel='CDF', formats=['pdf', 'eps'])
def _test2(show): # 已知n, f(纪录到的概率), 求k的分布, hypo n = 150 f = 0.1 # MakeBinomialPmf: 二项分布 0 - n次已经罗列了所有可能, 不需要归一化 pmf = thinkbayes2.MakeBinomialPmf(n, f) if show: thinkplot.Clf() thinkplot.Pmf(pmf) thinkplot.Show(title="test2", xlabel='Event Count', ylabel='Probality') print("Total: ", pmf.Total()) return pmf
def PlotPmfs(self, root='redline0'): """Plots the computed Pmfs. root: string """ pmfs = ScaleDists([self.pmf_z, self.pmf_zb], 1.0 / 60) thinkplot.Clf() thinkplot.PrePlot(2) thinkplot.Pmfs(pmfs) thinkplot.Save(root=root, xlabel='Time (min)', ylabel='CDF', formats=FORMATS)
def CH6_5(diff1, diff2): """ 两组展品的出价差的CDF累计分布 """ thinkplot.Clf() thinkplot.PrePlot(num=2) diff1_cdf = thinkbayes.MakeCdfFromList(diff1, name='diff1') diff2_cdf = thinkbayes.MakeCdfFromList(diff2, name='diff2') thinkplot.Cdfs([diff1_cdf, diff2_cdf]) thinkplot.Show(xlabel='diff $', ylabel="CDF") # 计算CDF(diff <= 0), 判断选手是否偏向低估商品 print(diff1_cdf.Prob(0), diff2_cdf.Prob(0))
def CH7_5(): """ 胜算 """ go1, go2 = CH7_4(0) diff_pmf = go1 - go2 thinkplot.Clf() thinkplot.Pmf(diff_pmf) thinkplot.Show(title='diff', xlabel='Goals per game', ylabel='Probability') pwin = diff_pmf.ProbGreater(0) pmiss = diff_pmf.ProbLess(0) ptie = diff_pmf.Prob(0, default=0) print("pwin = %.3f pmiss = %.3f ptie = %.3f" % (pwin, pmiss, ptie))
def PlotOutliers(samples): """Make CDFs showing the distribution of outliers.""" cdfs = [] for label, sample in samples.iteritems(): outliers = [x for x in sample if x < 150] cdf = thinkbayes.MakeCdfFromList(outliers, label) cdfs.append(cdf) thinkplot.Clf() thinkplot.Cdfs(cdfs) thinkplot.Save(root='variability_cdfs', title='CDF of height', xlabel='Reported height (cm)', ylabel='CDF')
def PlotSuites(suites, root): """Plots two suites. suite1, suite2: Suite objects root: string filename to write """ formats = ['pdf', 'png'] thinkplot.Clf() thinkplot.PrePlot(len(suites)) thinkplot.Pmfs(suites) thinkplot.Save(root=root, xlabel='Percentage of Active Female Users', ylabel='Probability', formats=formats, legend=True)
def sim_pearson(perfs, p1, p2): """ 皮尔逊相关系数(Pearson correlation coefficient) cov(X, Y) / sigmaX*sigmaY 协方差(X,Y) / X的标准方差*Y的标准方差 """ shared_items = {} for item in perfs[p1]: if item in perfs[p2]: shared_items[item] = 1 n = len(shared_items) if n == 0: return 0 # p1, p2共同的影评数据 data_p1 = [perfs[p1][it] for it in shared_items] data_p2 = [perfs[p2][it] for it in shared_items] # 计算影评均值 mu_p1 = sum(data_p1) / n mu_p2 = sum(data_p2) / n # print(mu_p1, mu_p2) # 计算标准方差 var_p1 = sum([pow(it-mu_p1, 2) for it in data_p1]) / n var_p2 = sum([pow(it-mu_p2, 2) for it in data_p2]) / n # print(var_p1, var_p2) if var_p1 == 0 or var_p2 == 0: return 0 # 计算协方差 cov = sum([(x-mu_p1)*(y-mu_p2) for x, y in zip(data_p1, data_p2)]) / n # print(cov) # 计算皮尔逊相关系数 r = cov / sqrt(var_p1*var_p2) # ============ thinkstat 方法 =============== if show: rr = correlation.Corr(data_p1, data_p2) print(r, rr) thinkplot.Clf() thinkplot.Scatter(data_p1, data_p2) thinkplot.Show() return r
def PlotCdfs(d, labels): """Plot CDFs for each sequence in a dictionary. Jitters the data and subtracts away the mean. d: map from key to sequence of values labels: map from key to string label """ thinkplot.Clf() for key, xs in d.iteritems(): mu = thinkstats.Mean(xs) xs = thinkstats.Jitter(xs, 1.3) xs = [x - mu for x in xs] cdf = thinkbayes.MakeCdfFromList(xs) thinkplot.Cdf(cdf, label=labels[key]) thinkplot.Show()
def main(): h1 = Hockey() h2 = Hockey() h1.UpdateSet([0, 2, 8, 4]) h2.UpdateSet([0, 1, 2, 3]) h1 = MakeGoalPmf(h1) h2 = MakeGoalPmf(h2) thinkplot.Clf() thinkplot.preplot(num=2) thinkplot.Pmf(h1) thinkplot.Pmf(h2) thinkplot.Save(root='hockey_self5_MakeGoalPmf', xlabel='', ylabel='Probability', formats=['pdf'])
def CalibrateDifficulty(self): """Make a plot showing the model distribution of raw scores.""" thinkplot.Clf() thinkplot.PrePlot(num=2) cdf = thinkbayes2.Cdf(self.raw, label='data') thinkplot.Cdf(cdf) efficacies = thinkbayes2.MakeNormalPmf(0, 1.5, 3) pmf = self.MakeRawScoreDist(efficacies) cdf = thinkbayes2.Cdf(pmf, label='model') thinkplot.Cdf(cdf) thinkplot.Save(root='sat_calibrate', xlabel='raw score', ylabel='CDF', formats=['pdf', 'eps'])
def PlotPosteriors(self, other): """Plots posterior distributions of efficacy. self, other: Sat objects. """ thinkplot.Clf() thinkplot.PrePlot(num=2) cdf1 = thinkbayes2.Cdf(self, label='posterior %d' % self.score) cdf2 = thinkbayes2.Cdf(other, label='posterior %d' % other.score) thinkplot.Cdfs([cdf1, cdf2]) thinkplot.Save(xlabel='efficacy', ylabel='CDF', axis=[0, 4.6, 0.0, 1.0], root='sat_posteriors_eff', formats=['pdf', 'eps'])
def PlotBuckets(self): """Plots the set of sequences that ended in a given bucket.""" # 2.01, 4.95 cm, 9.97 cm buckets = [7.0, 16.0, 23.0] buckets = [23.0] colors = ['blue', 'green', 'red', 'cyan'] thinkplot.Clf() for bucket, color in zip(buckets, colors): self.PlotBucket(bucket, color) thinkplot.Save(root='kidney5', formats=FORMATS, title='History of simulated tumors', axis=[-40, 1, MINSIZE, 12], xlabel='years', ylabel='diameter (cm, log scale)', yscale='log')
def QQPlot(cdf, fit): """Makes a QQPlot of the values from actual and fitted distributions. cdf: actual Cdf of RDT fit: model """ xs = [-1.5, 5.5] thinkplot.Clf() thinkplot.Plot(xs, xs, 'b-') xs, ps = cdf.xs, cdf.ps fs = [fit.Value(p) for p in ps] thinkplot.Plot(xs, fs, 'gs') thinkplot.Save(root='kidney3', formats=FORMATS, xlabel='Actual', ylabel='Model')
def PlotMarginals(suite): """Plots marginal distributions from a joint distribution. suite: joint distribution of mu and sigma. """ thinkplot.Clf() pyplot.subplot(1, 2, 1) pmf_m = suite.Marginal(0) cdf_m = thinkbayes.MakeCdfFromPmf(pmf_m) thinkplot.Cdf(cdf_m) pyplot.subplot(1, 2, 2) pmf_s = suite.Marginal(1) cdf_s = thinkbayes.MakeCdfFromPmf(pmf_s) thinkplot.Cdf(cdf_s) thinkplot.Show()
def MakePlot(self, root='redline1'): """Plot the prior and posterior CDF of passengers arrival rate. root: string """ thinkplot.Clf() thinkplot.PrePlot(2) # convert units to passengers per minute prior = self.prior_lam.MakeCdf().Scale(60) post = self.post_lam.MakeCdf().Scale(60) thinkplot.Cdfs([prior, post]) thinkplot.Save(root=root, xlabel='Arrival rate (passengers / min)', ylabel='CDF', formats=FORMATS)
def ComparePriors(): """Runs the hypothesis with two different priors and compares them.""" dataset = [60] high = 1000 thinkplot.Clf() thinkplot.PrePlot(num=2) constructors = [Train, Train2] labels = ['uniform', 'power law'] for constructor, label in zip(constructors, labels): suite = MakePosterior(high, dataset, constructor) suite.name = label thinkplot.Pmf(suite) thinkplot.Save(root='train4', xlabel='Number of trains', ylabel='Probability')
def MakePlot(self, root='redline3'): """Plot the CDFs. root: string """ # observed gaps cdf_prior_x = self.prior_x.MakeCdf() cdf_post_x = self.post_x.MakeCdf() cdf_y = self.pmf_y.MakeCdf() cdfs = ScaleDists([cdf_prior_x, cdf_post_x, cdf_y], 1.0 / 60) thinkplot.Clf() thinkplot.PrePlot(3) thinkplot.Cdfs(cdfs) thinkplot.Save(root=root, xlabel='Time (min)', ylabel='CDF', formats=FORMATS)
def PlotConditionalCdfs(self): """Plots the cdf of ages for each bucket.""" buckets = [7.0, 16.0, 23.0, 27.0] # 2.01, 4.95 cm, 9.97 cm, 14.879 cm names = ['2 cm', '5 cm', '10 cm', '15 cm'] cdfs = [] for bucket, name in zip(buckets, names): cdf = self.cache.ConditionalCdf(bucket, name) cdfs.append(cdf) thinkplot.Clf() thinkplot.PrePlot(num=len(cdfs)) thinkplot.Cdfs(cdfs) thinkplot.Save(root='kidney6', title='Distribution of age for several diameters', formats=FORMATS, xlabel='tumor age (years)', ylabel='CDF', loc=4)
def PlotExpectedGains(guess1=20000, guess2=40000, path='.', save=True): """Plots expected gains as a function of bid. guess1: player1's estimate of the price of showcase 1 guess2: player2's estimate of the price of showcase 2 """ player1, player2 = MakePlayers(path) MakePlots(player1, player2) player1.MakeBeliefs(guess1) player2.MakeBeliefs(guess2) print('Player 1 prior mle', player1.prior.MaximumLikelihood()) print('Player 2 prior mle', player2.prior.MaximumLikelihood()) print('Player 1 mean', player1.posterior.Mean()) print('Player 2 mean', player2.posterior.Mean()) print('Player 1 mle', player1.posterior.MaximumLikelihood()) print('Player 2 mle', player2.posterior.MaximumLikelihood()) player1.PlotBeliefs('price3') player2.PlotBeliefs('price4') calc1 = GainCalculator(player1, player2) calc2 = GainCalculator(player2, player1) thinkplot.Clf() thinkplot.PrePlot(num=2) bids, gains = calc1.ExpectedGains() thinkplot.Plot(bids, gains, label='Player 1') print('Player 1 optimal bid', max(zip(gains, bids))) bids, gains = calc2.ExpectedGains() thinkplot.Plot(bids, gains, label='Player 2') print('Player 2 optimal bid', max(zip(gains, bids))) if save: thinkplot.Save(root='price5', xlabel='bid ($)', ylabel='expected gain ($)', formats=FORMATS)
def MakePlot(self, root='redline2'): """Plots the computed CDFs. root: string """ print('Mean z', self.pmf_z.Mean() / 60) print('Mean zb', self.pmf_zb.Mean() / 60) print('Mean y', self.pmf_y.Mean() / 60) cdf_z = self.pmf_z.MakeCdf() cdf_zb = self.pmf_zb.MakeCdf() cdf_y = self.pmf_y.MakeCdf() cdfs = ScaleDists([cdf_z, cdf_zb, cdf_y], 1.0 / 60) thinkplot.Clf() thinkplot.PrePlot(3) thinkplot.Cdfs(cdfs) thinkplot.Save(root=root, xlabel='Time (min)', ylabel='CDF', formats=FORMATS)
def main(): ComparePriors() dataset = [30, 60, 90] thinkplot.Clf() thinkplot.PrePlot(num=3) for high in [500, 1000, 2000]: suite = MakePosterior(high, dataset, Train2) print(high, suite.Mean()) thinkplot.Save(root='train3', xlabel='Number of trains', ylabel='Probability') interval = Percentile(suite, 5), Percentile(suite, 95) print(interval) cdf = thinkbayes.MakeCdfFromPmf(suite) interval = cdf.Percentile(5), cdf.Percentile(95) print(interval)
def CH6_2(price1, price2): """ 两组展览品的价格分布 """ thinkplot.Clf() thinkplot.PrePlot(num=2) # 因为price变量值没有重复的, 所以PMF绘图是看不出什么的. # price1_pmf = thinkbayes.MakePmfFromList(price1, name='showcase1') # price2_pmf = thinkbayes.MakePmfFromList(price2, name='showcase2') price1_max = max(price1) price2_max = max(price2) price_max = max(price1_max, price2_max) xs = numpy.linspace(0, price_max + 100, num=150) price1_pdf = thinkbayes.EstimatedPdf(price1) price2_pdf = thinkbayes.EstimatedPdf(price2) price1_pmf = price1_pdf.MakePmf(xs, name='showcase1') price2_pmf = price2_pdf.MakePmf(xs, name='showcase2') thinkplot.Pmfs([price1_pmf, price2_pmf]) thinkplot.Show(xlabel='price $', ylabel='PMF')
def MakePlot(self, root='redline4'): """Makes a plot showing the mixture.""" thinkplot.Clf() # plot the MetaPmf # for pmf, prob in sorted(self.metapmf.Items()): for pmf, prob in self.metapmf.Items(): cdf = pmf.MakeCdf().Scale(1.0/60) width = 2/math.log(-math.log(prob)) thinkplot.Plot(cdf.xs, cdf.ps, alpha=0.2, linewidth=width, color='blue', label='') # plot the mixture and the distribution based on a point estimate thinkplot.PrePlot(2) #thinkplot.Cdf(self.point.MakeCdf(name='point').Scale(1.0/60)) thinkplot.Cdf(self.mixture.MakeCdf(name='mix').Scale(1.0/60)) thinkplot.Save(root=root, xlabel='Wait time (min)', ylabel='CDF', formats=FORMATS, axis=[0,10,0,1])