def _test2(show): # 已知n, f(纪录到的概率), 求k的分布, hypo n = 150 f = 0.1 # MakeBinomialPmf: 二项分布 0 - n次已经罗列了所有可能, 不需要归一化 pmf = thinkbayes2.MakeBinomialPmf(n, f) if show: thinkplot.Clf() thinkplot.Pmf(pmf) thinkplot.Show(title="test2", xlabel='Event Count', ylabel='Probality') print("Total: ", pmf.Total()) return pmf
def main(): hypos = range(100, 1001) suite = Train(hypos) suite.Update(321) print('Posterior mean', suite.Mean()) print('Posterior MLE', suite.MaximumLikelihood()) print('Posterior CI 90', suite.CredibleInterval(90)) thinkplot.PrePlot(1) thinkplot.Pmf(suite) thinkplot.Show(xlabel='Number of trains', ylabel='Probability', legend=False)
def main(): hypos = xrange(1, 1001) suite = Train(hypos) suite.label = 'train label' suite.Update(60) print suite.Mean() thinkplot.PrePlot(1) thinkplot.Pmf(suite) thinkplot.Save(root='train1', xlabel='Number of trains', ylabel='Probability', formats=['pdf', 'eps'])
def main(): print "1" hockey1 = Hockey() # print(type(hockey1)) thinkplot.PrePlot(1) thinkplot.Pmf(hockey1) thinkplot.Save(root='hockey_self2_prior', xlabel='', ylabel='Probability', formats=['pdf']) print(hockey1.Values()) for hypo in hockey1.Values(): print(hockey1.Likelihood(2, hypo)) hockey1.UpdateSet([0, 2, 4, 3, 8]) thinkplot.Pmf(hockey1) thinkplot.Save(root='hockey_self2_posterior', xlabel='', ylabel='Probability', formats=['pdf']) print("No error, everything worked fine")
def main(): hypos = range(100, 1001) suite = Train(hypos) suite.Update(50) thinkplot.PrePlot(1) thinkplot.Pmf(suite) thinkplot.Show(xlabel='Number of trains', ylabel='Probability', legend=False) for train in [13, 45, 89, 22, 33, 35]: suite.Update(train) thinkplot.PrePlot(1) thinkplot.Pmf(suite) thinkplot.Show(xlabel='Number of trains', ylabel='Probability', legend=False) print(suite.Mean()) print(suite.MaximumLikelihood()) print(suite.CredibleInterval(90))
def CH7_5(): """ 胜算 """ go1, go2 = CH7_4(0) diff_pmf = go1 - go2 thinkplot.Clf() thinkplot.Pmf(diff_pmf) thinkplot.Show(title='diff', xlabel='Goals per game', ylabel='Probability') pwin = diff_pmf.ProbGreater(0) pmiss = diff_pmf.ProbLess(0) ptie = diff_pmf.Prob(0, default=0) print("pwin = %.3f pmiss = %.3f ptie = %.3f" % (pwin, pmiss, ptie))
def CH5_6(): """ 混合分布, 汇总多个分布的贡献 骰子个数 骰子面数 5 4-sides 4 6-sides 3 8-sides 2 12-sides 1 20-sides """ thinkplot.PrePlot(num=2) # (权重, 骰子) dices = [(5, Die(4)), (4, Die(6)), (3, Die(8)), (2, Die(12)), (1, Die(20))] mix = thinkbayes.Pmf() for w, die in dices: for v, p in die.Items(): mix.Incr(v, w * p) mix.Normalize() mix.name = 'mix-1' thinkplot.Pmf(mix) # 方法2 pmf_dices = thinkbayes.Pmf() pmf_dices.Set(Die(4), y=5) pmf_dices.Set(Die(6), y=4) pmf_dices.Set(Die(8), y=3) pmf_dices.Set(Die(12), y=2) pmf_dices.Set(Die(20), y=1) pmf_dices.Normalize() mix = thinkbayes.MakeMixture(pmf_dices, name='mix-2') mix.name = 'mix-2' thinkplot.Pmf(mix) thinkplot.Show()
def main(): pmf_dice = Pmf() pmf_dice.Set(Die(6),2) pmf_dice.Set(Die(8),3) pmf_dice.Set(Die(12),1) pmf_dice.Set(Die(20),1) mix = Pmf() for die, weight in pmf_dice.Items(): for outcome, prob in die.Items(): mix.Incr(outcome, weight*prob) mix.Normalize() thinkplot.PrePlot(1) thinkplot.Pmf(mix) thinkplot.Save(root='dice_Mix_self3',xlabel='',ylabel='Probability',formats=['pdf'])
def MakePmfPlot(alpha=10): """Plots Pmf of location for a range of betas.""" locations = range(0, 31) betas = [10, 20, 40] thinkplot.PrePlot(num=len(betas)) for beta in betas: pmf = MakeLocationPmf(alpha, beta, locations) pmf.name = 'beta = %d' % beta thinkplot.Pmf(pmf) thinkplot.Save('paintball1', xlabel='Distance', ylabel='Prob', formats=FORMATS)
def main(): low = 0.001 high = 1.5 steps = 1001 hypos = [low + (high - low) * i / (steps - 1.0) for i in range(steps)] suite = Decay(hypos) data = [1.5, 2, 3, 4, 5, 12] suite.UpdateSet(data) print 'Mean of the posterior distribution:', suite.Mean() # plot the posterior distribution thinkplot.Pmf(suite) thinkplot.Show(title='Decay parameter', xlabel='Parameter (inverse cm)', ylabel='Posterior probability')
def PlotSurvivalCurve(ts, lams, ss): """ ts: times in years lams: Pmf representing the hazard function ss: list of values for the survival curve """ # scale lams denom = max(lams.Probs()) lams.MultAll(1 / denom) thinkplot.Pmf(lams, linewidth=2, linestyle='dashed', color='0.7') thinkplot.Plot(ts, ss, linewidth=2, color='blue', label='survival') thinkplot.Save(root='seer1', title='', xlabel='Survival time (years)', ylabel='Probability')
def PredRemaining(self, rem_time, score): """Plots the predictive distribution for final number of goals. rem_time: remaining time in the game in minutes score: number of goals already scored """ # TODO: fill this in # lam = goals / game lam_total = 0 for lam, prob in self.Items(): goals_in_remaining_time = lam * rem_time / 90 # convert to goals in remaining time lam_total += lt * prob pmf = thinkbayes2.MakePoissonPmf(goals_in_remaining_time, 12) pmf += score thinkplot.Pmf(pmf) thinkplot.Show()
def PlotSurvival(durations): """Plots survival and hazard curves. durations: list of durations """ cdf = thinkstats2.MakeCdfFromList(durations) thinkplot.Cdf(cdf, alpha=0.1) thinkplot.PrePlot(2) ts, ss = SurvivalFunction(cdf) thinkplot.Plot(ts, ss, label="S(t)") haz_func = HazardFunction(ts, ss) thinkplot.Pmf(haz_func, label='lam(t)') thinkplot.Show(xlabel='t (weeks)')
def MakeConditionalPlot(suite): """Plots marginal CDFs for alpha conditioned on beta. suite: posterior joint distribution of location """ betas = [10, 20, 40] thinkplot.PrePlot(num=len(betas)) for beta in betas: cond = suite.Conditional(0, 1, beta) cond.name = 'beta = %d' % beta thinkplot.Pmf(cond) thinkplot.Save('paintball3', xlabel='Distance', ylabel='Prob', formats=FORMATS)
def main(): data = ReadData() cols = zip(*data) price1, price2, bid1, bid2, diff1, diff2 = cols pdf = thinkbayes.EstimatedPdf(price1) # print(type(pdf)) low, high = 0, 75000 n = 101 xs = numpy.linspace(low, high, n) # print(pdf.Density(25000)) pmf = pdf.MakePmf(xs) thinkplot.PrePlot(1) thinkplot.Pmf(pmf) thinkplot.Save(root='price_self2', xlabel='', ylabel='Probability_density', formats=['pdf'])
def ComparePriors(): """Runs the hypothesis with two different priors and compares them.""" dataset = [60] high = 1000 thinkplot.Clf() thinkplot.PrePlot(num=2) constructors = [Train, Train2] labels = ['uniform', 'power law'] for constructor, label in zip(constructors, labels): suite = MakePosterior(high, dataset, constructor) suite.name = label thinkplot.Pmf(suite) thinkplot.Save(root='train4', xlabel='Number of trains', ylabel='Probability')
def main(): d6 = Die(6) d8 = Die(8) d12 = Die(12) d16 = Die(16) d20 = Die(20) mix = Pmf() for die in [d6, d8, d12, d16, d20]: for outcome, prob in die.Items(): mix.Incr(outcome, prob) mix.Normalize() thinkplot.PrePlot(1) thinkplot.Pmf(mix) thinkplot.Save(root='dice_Mix_self1', xlabel='sum of dice', ylabel='Probability', formats=['pdf'])
def MakeHists(live): """Plot Hists for live births live: DataFrame others: DataFrame """ hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg') thinkplot.PrePlot(2, cols=2) thinkplot.SubPlot(1) thinkplot.Hist(hist) thinkplot.Config(xlabel='years', ylabel='frequency', axis=[0, 45, 0, 700]) thinkplot.SubPlot(2) thinkplot.Pmf(hist) thinkplot.Save(root='probability_agepreg_hist', xlabel='years', axis=[0, 45, 0, 700])
def main(): d6 = Die(6) dice = [d6] * 3 dice1 = [d6] * 1 print type(d6) print d6.Items() print type(dice) print dice[0].Items() print dice[1].Items() print dice[2].Items() # t1 = RandomSum(dice) test = SampleSum(dice, 50) thinkplot.PrePlot(1) thinkplot.Pmf(test) thinkplot.Save(root='dice_self2', xlabel='sum of dice', ylabel='Probability', formats=['pdf'])
def main(): d = { 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, } # form the pmf pmf = thinkstats2.MakePmfFromDict(d, 'family size') print 'mean', pmf.Mean() print 'var', pmf.Var() # plot the Pmfs thinkplot.Pmf(pmf) thinkplot.Show(xlabel='Family size', ylabel='PMF')
def main(): pmf = Euro(xrange(0, 101)) dataset = 'H' * 140 + "T" * 110 # dataset = 'H' + 'T' # dataset = 'T' for data in dataset: # print(data) pmf.Update(data) # print pmf.Items() # print pmf.Mean() print pmf.Prob(80) thinkplot.PrePlot(1) thinkplot.Pmf(pmf) thinkplot.Save(root='coin_self1', xlabel='', ylabel='Probability', formats=['pdf'])
def main(): # Create a new Train object with hypotheses 1 (company has one train) # through 1000 (company has 1000 trains) train = Train(range(1, 1001)) train.label = "Posterior Probability" # update the probability mass function with new data (train #60) train.Update(60) # train.Print() print("Mean hypothesis: {}".format(train.Mean())) # Use Allen Downey's thinkplot module to create a graph thinkplot.PrePlot(1) thinkplot.Pmf(train) thinkplot.Save(root='trains', xlabel='Number of trains', ylabel='Probability', formats=['pdf'])
def main(): '''initializes an instance of a learning styles probability distribution updates the probability distribution based on data checks the strength of the evidence that the distribution in hacker school is substantiallly different''' sensing_data = (2, 0) sensing_hypo = 50 sensing_ratio = 65 sensing_dist = StyleDist(range(0, 101)) sensing_likelihood = sensing_dist.Likelihood(sensing_data, sensing_hypo) print('p(D|50%)', sensing_likelihood) thinkplot.Hist(sensing_dist) #set p(D|~H) b_uniform = StyleDist(range(0, 101)) b_uniform.Remove(sensing_ratio) b_uniform.Normalize() # %matplotlib inline thinkplot.Pmf(sensing_dist) return sensing_dist
def main(): data = 20, 15, 3 probs = numpy.linspace(0, 1, 31) hypos = [] for n in range(32, 350): for p1 in probs: for p2 in probs: hypos.append((n, p1, p2)) suite = Lincoln(hypos) suite.Update(data) n_marginal = suite.Marginal(0) thinkplot.Pmf(n_marginal, label='n') thinkplot.Save(root='lincoln1', xlabel='number of bugs', ylabel='PMF', formats=['pdf', 'png']) print('post mean n', n_marginal.Mean()) print('MAP n', n_marginal.MaximumLikelihood()) p1_marginal = suite.Marginal(1, label='p1') p2_marginal = suite.Marginal(2, label='p2') thinkplot.Pdf(p1_marginal) thinkplot.Pdf(p2_marginal) thinkplot.Show() print('post mean p1', p1_marginal.Mean()) print('MAP p1', p1_marginal.MaximumLikelihood()) print('post mean p2', p2_marginal.Mean()) print('MAP p2', p2_marginal.MaximumLikelihood()) print('p1 > p2', p1_marginal > p2_marginal) print('p1 < p2', p1_marginal < p2_marginal)
def main(): data = 20, 15, 3 probs = numpy.linspace(0, 1, 101) hypos = [] for n in range(32, 350): for p1 in probs: for p2 in probs: hypos.append((n, p1, p2)) suite = Lincoln(hypos) suite.Update(data) n_marginal = suite.Marginal(0) thinkplot.Pmf(n_marginal, label='n') thinkplot.Save(root='lincoln1', xlabel='number of bugs', ylabel='PMF', formats=['pdf', 'png']) print(n_marginal.Mean()) print(n_marginal.MaximumLikelihood())
def main(): pmf_dice = thinkbayes.Pmf() pmf_dice.Set(Die(4), 5) pmf_dice.Set(Die(6), 4) pmf_dice.Set(Die(8), 3) pmf_dice.Set(Die(12), 2) pmf_dice.Set(Die(20), 1) pmf_dice.Normalize() mix = thinkbayes.Pmf() for die, weight in pmf_dice.Items(): for outcome, prob in die.Items(): mix.Incr(outcome, weight * prob) mix = thinkbayes.MakeMixture(pmf_dice) colors = thinkplot.Brewer.Colors() thinkplot.Hist(mix, width=0.9, color=colors[4]) thinkplot.Save(root='dungeons3', xlabel='Outcome', ylabel='Probability', formats=FORMATS) random.seed(17) d6 = Die(6, 'd6') dice = [d6] * 3 three = thinkbayes.SampleSum(dice, 1000) three.name = 'sample' three.Print() three_exact = d6 + d6 + d6 three_exact.name = 'exact' three_exact.Print() thinkplot.PrePlot(num=2) thinkplot.Pmf(three) thinkplot.Pmf(three_exact, linestyle='dashed') thinkplot.Save(root='dungeons1', xlabel='Sum of three d6', ylabel='Probability', axis=[2, 19, 0, 0.15], formats=FORMATS) thinkplot.Clf() thinkplot.PrePlot(num=1) # compute the distribution of the best attribute the hard way # best_attr2 = PmfMax(three_exact, three_exact) # best_attr4 = PmfMax(best_attr2, best_attr2) # best_attr6 = PmfMax(best_attr4, best_attr2) # thinkplot.Pmf(best_attr6) # and the easy way best_attr_cdf = three_exact.Max(6) best_attr_cdf.name = '' best_attr_pmf = thinkbayes.MakePmfFromCdf(best_attr_cdf) best_attr_pmf.Print() thinkplot.Pmf(best_attr_pmf) thinkplot.Save(root='dungeons2', xlabel='Sum of three d6', ylabel='Probability', axis=[2, 19, 0, 0.23], formats=FORMATS)
# In[53]: width=200000 axis = [0, 800, 0, 0.0005] thinkplot.PrePlot(2, cols =2) thinkplot.Hist(flfindistdfpmf, align = 'right', width = width) thinkplot.Hist(vfindistdfpmf, align = 'left', width = width) thinkplot.Config(xlabel = 'Total Revenue', ylabel = 'PMF') # In[54]: thinkplot.Pmf(flfindistdfpmf) thinkplot.Pmf(vfindistdfpmf) # In[55]: thinkplot.PrePlot(2) thinkplot.subplot(2) #axis = [0, 800, 0, 0.0005] thinkplot.Pmfs([flfindistdfpmf,vfindistdfpmf ]) thinkplot.Show(xlabel = 'Total Revenue', ylabel = 'PMF') # # Lets plot PMF of log transformed columns
df = brfss.ReadBrfss(nrows=None) female = df[df.sex == 2] female_heights = female.htm3.dropna() ## female height statistics mean, std = female_heights.mean(), female_heights.std() print('mean:\n', mean) print('std:\n', std) ## make pdf representing female distribution pdf = thinkstats2.NormalPdf(mean, std) pmf = pdf.MakePmf() thinkplot.PrePlot(2) thinkplot.Pdf(pdf, label='normal pdf') thinkplot.Pmf(pmf, label='normal pmf') thinkplot.Show(xlabel='x', xlim=[140, 186]) ## KDE of normal pdf i = 6 thinkplot.PrePlot(i + 1) thinkplot.Pdf(pdf, label='normal') for _ in range(i): sample = np.random.normal(mean, std, 500) sample_pdf = thinkstats2.EstimatedPdf(sample, label='sample') thinkplot.Pdf(sample_pdf, label='sample KDE') thinkplot.Show(xlabel='x', ylabel='PDF', xlim=[140, 186]) ## calculate moments
def main(): #ReadHockeyData() #return formats = ['pdf', 'eps'] suite1 = Hockey('bruins') suite2 = Hockey('canucks') thinkplot.Clf() thinkplot.PrePlot(num=2) thinkplot.Pmf(suite1) thinkplot.Pmf(suite2) thinkplot.Save(root='hockey0', xlabel='Goals per game', ylabel='Probability', formats=formats) suite1.UpdateSet([0, 2, 8, 4]) suite2.UpdateSet([1, 3, 1, 0]) thinkplot.Clf() thinkplot.PrePlot(num=2) thinkplot.Pmf(suite1) thinkplot.Pmf(suite2) thinkplot.Save(root='hockey1', xlabel='Goals per game', ylabel='Probability', formats=formats) goal_dist1 = MakeGoalPmf(suite1) goal_dist2 = MakeGoalPmf(suite2) thinkplot.Clf() thinkplot.PrePlot(num=2) thinkplot.Pmf(goal_dist1) thinkplot.Pmf(goal_dist2) thinkplot.Save(root='hockey2', xlabel='Goals', ylabel='Probability', formats=formats) time_dist1 = MakeGoalTimePmf(suite1) time_dist2 = MakeGoalTimePmf(suite2) print('MLE bruins', suite1.MaximumLikelihood()) print('MLE canucks', suite2.MaximumLikelihood()) thinkplot.Clf() thinkplot.PrePlot(num=2) thinkplot.Pmf(time_dist1) thinkplot.Pmf(time_dist2) thinkplot.Save(root='hockey3', xlabel='Games until goal', ylabel='Probability', formats=formats) diff = goal_dist1 - goal_dist2 p_win = diff.ProbGreater(0) p_loss = diff.ProbLess(0) p_tie = diff.prob(0) print(p_win, p_loss, p_tie) p_overtime = thinkbayes2.PmfProbLess(time_dist1, time_dist2) p_adjust = thinkbayes2.PmfProbEqual(time_dist1, time_dist2) p_overtime += p_adjust / 2 print('p_overtime', p_overtime) print(p_overtime * p_tie) p_win += p_overtime * p_tie print('p_win', p_win) # win the next two p_series = p_win**2 # split the next two, win the third p_series += 2 * p_win * (1-p_win) * p_win print('p_series', p_series)
#--- Chapter2 Ex4 wgt_live = live.totalwgt_lb.dropna() wgt_first = firsts.totalwgt_lb.dropna() wgt_other = others.totalwgt_lb.dropna() mean_diff = 100 * (wgt_first.mean() - wgt_other.mean()) / wgt_live.mean() wgt_cohend = thinkstats2.CohenEffectSize(wgt_first, wgt_other) plen_cohend = thinkstats2.CohenEffectSize(firsts.prglngth, others.prglngth) print('Difference in relative mean:', mean_diff) print('Cohen\'s d for total weight in lbs:', wgt_cohend) print('Cohen\'s d for pregnancy length in weeks:', plen_cohend) #--- Chapter3 Ex1 actual_pmf = thinkstats2.Pmf(resp.numkdhh, label='actual') biased_pmf = BiasPmf(actual_pmf, label='biased') thinkplot.PrePlot(2) actual_hist = thinkplot.Pmf(actual_pmf) biased_hist = thinkplot.Pmf(biased_pmf) thinkplot.Show(xlabel='#kids in household', ylabel='PMF') print('Actual Mean:', actual_pmf.Mean()) print('Biased Mean:', biased_pmf.Mean()) #--- Chapter4 Ex2 my_seq = np.random.random(1000) my_pmf = thinkstats2.Pmf(my_seq) my_cdf = thinkstats2.Cdf(my_seq) thinkplot.Pmf(my_pmf, linewidth=0.1) thinkplot.Show(xlabel='Random variable', ylabel='PMF') thinkplot.Cdf(my_cdf) thinkplot.Show(xlabel='Random variable', ylabel='CDF') #--- Chapter5 Ex1