def QUpdatePlot(pmf, data, thresh, label=None): if (type(data) is int or type(data) is float): pmf.Update(data) thinkplot.Pdf(pmf.generateQ(thresh), label=label) else: pmf.UpdateSet(data) thinkplot.Pdf(pmf.generateQ(thresh), label=label)
def main(): coords = numpy.linspace(-100, 100, 101) joint = Gps(product(coords, coords)) joint.Update((51, -15)) joint.Update((48, 90)) pairs = [(11.903060613102866, 19.79168669735705), (77.10743601503178, 39.87062906535289), (80.16596823095534, -12.797927542984425), (67.38157493119053, 83.52841028148538), (89.43965206875271, 20.52141889230797), (58.794021026248245, 30.23054016065644), (2.5844401241265302, 51.012041625783766), (45.58108994142448, 3.5718287379754585)] joint.UpdateSet(pairs) thinkplot.PrePlot(2) pdfx = joint.Marginal(0) pdfy = joint.Marginal(1) thinkplot.Pdf(pdfx, label='posterior x') thinkplot.Pdf(pdfy, label='posterior y') thinkplot.Show() print(pdfx.Mean(), pdfx.Std()) print(pdfy.Mean(), pdfy.Std())
def main(): hypos = range(0, 101) suite = Electorate(hypos) thinkplot.PrePlot(3) thinkplot.Pdf(suite, label='prior') data = 1.1, 3.7, 53 suite.Update(data) thinkplot.Pdf(suite, label='posterior1') thinkplot.Save(root='electorate1', xlabel='percentage of electorate', ylabel='PMF', formats=['png'], clf=False) print(suite.Mean()) print(suite.Std()) print(suite.ProbLess(50)) data = -2.3, 4.1, 49 suite.Update(data) thinkplot.Pdf(suite, label='posterior2') thinkplot.Save(root='electorate2', xlabel='percentage of electorate', ylabel='PMF', formats=['png']) print(suite.Mean()) print(suite.Std()) print(suite.ProbLess(50))
def main(): hypos = numpy.linspace(0, 12, 201) suite = Soccer(hypos) # the mean number of goals per game was 2.67 mean_rate = 2.67 / 2 mean_interarrival = 90 / mean_rate # start with a prior based on the mean interarrival time suite.Update(mean_interarrival) thinkplot.Pdf(suite, label='prior') print('prior mean', suite.Mean()) suite.Update(11) thinkplot.Pdf(suite, label='posterior 1') print('after one goal', suite.Mean()) suite.Update(12) thinkplot.Pdf(suite, label='posterior 2') print('after two goals', suite.Mean()) thinkplot.Show() # plot the predictive distribution suite.PredRemaining(90 - 23, 2)
def main(): """ """ user = User(label='user') beta = thinkbayes2.Beta(2, 1) for val, prob in beta.MakePmf().Items(): user.Set(val * 100, prob) thinkplot.Pdf(user) thinkplot.Show() print(user.Mean(), user.CredibleInterval(90)) mean_r = user.Mean() / 100.0 link = Link(range(0, 101), label='link') thinkplot.Pdf(link) thinkplot.Show() print(link.Mean(), link.CredibleInterval(90)) mean_q = link.Mean() / 100.0 user.Update(('up', mean_q)) thinkplot.Pdf(user) thinkplot.Show() print(user.Mean(), user.CredibleInterval(90)) link.Update(('up', mean_r)) thinkplot.Pdf(link) thinkplot.Show() print(link.Mean(), link.CredibleInterval(90)) return 0
def main(): hypos = numpy.linspace(0, 100, 101) suite = Electorate(hypos) thinkplot.Pdf(suite, label='prior') data = 1.1, 3.7, 53 suite.Update(data) thinkplot.Pdf(suite, label='posterior') thinkplot.Show()
def MakePdfs(greq, less): greqpdf = thinkstats2.EstimatedPdf(greq.totalwgt_lb.dropna()) lesspdf = thinkstats2.EstimatedPdf(less.totalwgt_lb.dropna()) thinkplot.PrePlot(rows=1, cols=2) thinkplot.SubPlot(1) thinkplot.Pdf(greqpdf, label='greater/equal to 30') thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF') thinkplot.SubPlot(2) thinkplot.Pdf(lesspdf, label='less than 30') thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF') thinkplot.Show()
def main(): hypos = numpy.linspace(0, 12, 201) suite = Soccer(hypos) thinkplot.Pdf(suite, label='prior') print('prior mean', suite.Mean()) suite.Update(11) thinkplot.Pdf(suite, label='posterior 1') print('after one goal', suite.Mean()) thinkplot.Show()
def main(): hypos = range(1, 1000) suite = Hyrax(hypos) suite2 = Hyrax2(hypos) data = 10, 10, 2 suite.Update(data) suite2.Update(data) thinkplot.Pdf(suite, label='binomial') thinkplot.Pdf(suite, label='hypergeom') thinkplot.Show()
def main(): maleRates = [0.52, 0.38, 0.39, 1.01, 2.63] femaleRates = [50, 20.5] pdfMale = thinkbayes2.EstimatedPdf(maleRates) pdfFemale = thinkbayes2.EstimatedPdf(femaleRates) low, high = 0, 100 n = 1001 xs = numpy.linspace(low, high, n) pmfMale = pdfMale.MakePmf(steps=xs) pmfFemale = pdfFemale.MakePmf(steps=xs) thinkplot.Pdf(pdfMale, label='Male Prior') thinkplot.Pdf(pdfFemale, label='Female Prior') thinkplot.show()
def main(): hypos = numpy.linspace(0, 100, 101) suite = Electorate(hypos) thinkplot.Pdf(suite, label='prior') data = 1.1, 3.7, 53 suite.Update(data) thinkplot.Pdf(suite, label='posterior') thinkplot.Show() print(suite.Std()) print(suit.Mean()) print(suite.ProbLess(50))
def main(): suite = Version3() print(suite.Mean()) thinkplot.Pdf(suite) thinkplot.Show(legend=False)
def main(): suite = Euro(range(0, 101)) suite.Update('H') thinkplot.Pdf(suite) thinkplot.Show(xlabel='x', ylabel='Probability', legend=False)
def main(): maleRates = [0.52, 0.38, 0.39, 1.01, 2.63, 30] femaleRates = [50, 20.5, 40, 30, 45] pdfMale = thinkbayes2.EstimatedPdf(maleRates) pdfFemale = thinkbayes2.EstimatedPdf(femaleRates) low, high = 0, 100 n = 1001 xs = numpy.linspace(low, high, n) pmfMale = MakePmfTest(pdfMale,steps=xs) pmfFemale = MakePmfTest(pdfFemale,steps=xs) pmfMale.Normalize() pmfFemale.Normalize() thinkplot.Pdf(pmfMale, label='Male Prior') thinkplot.Pdf(pmfFemale, label='Female Prior') thinkplot.show()
def main(): hypos = range(1, 1000) suite = Hyrax(hypos) data = 10, 10, 2 suite.Update(data) thinkplot.Pdf(suite, label='posterior') thinkplot.Show()
def main(): hypos = numpy.linspace(0, 100, 101) suite = Electorate(hypos) thinkplot.Pdf(suite, label='prior') data = 1.1, 3.7, 53 #mean prior error, std, measurement suite.Update(data) PrintSuiteInfo(suite) thinkplot.Pdf(suite, label='before poll') newpolldata = -2.3, 4.1, 49 suite.Update(newpolldata) PrintSuiteInfo(suite) thinkplot.Pdf(suite, label='after poll') thinkplot.Show()
def PriorPost(pmf, data): thinkplot.Pdf(pmf, label='Prior') pmf.UpdateSet(data) thinkplot.Pdf(pmf, label='Posterior') # thinkplot.Show(xlabel='Lambda', # ylabel='Probability', # legend=True, # title='Arrival Rate Distribution') formats = ['png'] root = 'PriorAndPosterior' thinkplot.Save(root=root, xlabel='Lambda', ylabel='Probability', legend=True, formats=formats, title='Arrival Rate Distribution')
def generate_pmf(fb, hk): pmf_fb = Pmf(degrees(fb)) pmf_hk = Pmf(degrees(hk)) thinkplot.preplot(cols=2) thinkplot.plot([30, 2000], [5e-2, 2e-4], color='gray', linestyle='dashed') thinkplot.Pdf(pmf_fb, style='.', label='Facebook') thinkplot.config(xscale='log', yscale='log', xlabel='degree', ylabel='PMF') thinkplot.subplot(2) thinkplot.plot([55, 500], [5e-2, 2e-4], color='gray', linestyle='dashed') thinkplot.Pdf(pmf_hk, style='.', label='HK graph') thinkplot.config(xscale='log', yscale='log', xlabel='degree', ylabel='PMF') plt.savefig('PMFGraphs_Original.png')
def generate_pmf(fb, hk): pmf_fb = Pmf(degrees(fb)) pmf_hk = Pmf(degrees(hk)) thinkplot.plot([6, 150], [5e-1, 2e-4], color='gray', linestyle='dashed') thinkplot.Pdf(pmf_hk, style='.', label='RPA') thinkplot.config(xscale='log', yscale='log', xlabel='degree', ylabel='PMF') plt.savefig('PMFGraphs_Modified.png')
def MakePdfExample(): # mean and var of women's heights in cm, from the BRFSS mean, var = 163, 52.8 std = math.sqrt(var) # make a PDF and compute a density, FWIW pdf = thinkstats2.GaussianPdf(mean, std) print(pdf.Density(mean + std)) # make a PMF and plot it thinkplot.PrePlot(2) thinkplot.Pdf(pdf, label='Gaussian') # make a sample, make an estimated PDF, and plot it sample = [random.gauss(mean, std) for i in range(100)] sample_pdf = thinkstats2.EstimatedPdf(sample) thinkplot.Pdf(sample_pdf, label='sample KDE') thinkplot.Save(root='pdf_example', xlabel='Height (cm)', ylabel='Density')
def main(): suite = Euro(range(0, 101)) obs = 'H' * 140 + 'T' * 110 for o in obs: suite.Update(o) thinkplot.Pdf(suite) thinkplot.Show(xlabel='x', ylabel='Probability', legend=False) print(suite.Mean(), suite.MaximumLikelihood(), suite.CredibleInterval(90))
def ComputeSkewnesses(): """Plots KDE of birthweight and adult weight. """ def VertLine(x, y): thinkplot.Plot([x, x], [0, y], color='0.6', linewidth=1) live, firsts, others = first.MakeFrames() data = live.totalwgt_lb.dropna() print('Birth weight') mean, median = Summarize(data) y = 0.35 VertLine(mean, y) thinkplot.Text(mean - 0.15, 0.1 * y, 'mean', horizontalalignment='right') VertLine(median, y) thinkplot.Text(median + 0.1, 0.1 * y, 'median', horizontalalignment='left') pdf = thinkstats2.EstimatedPdf(data) thinkplot.Pdf(pdf, label='birth weight') thinkplot.Save(root='density_totalwgt_kde', xlabel='lbs', ylabel='PDF') df = brfss.ReadBrfss(nrows=None) data = df.wtkg2.dropna() print('Adult weight') mean, median = Summarize(data) y = 0.02499 VertLine(mean, y) thinkplot.Text(mean + 1, 0.1 * y, 'mean', horizontalalignment='left') VertLine(median, y) thinkplot.Text(median - 1.5, 0.1 * y, 'median', horizontalalignment='right') pdf = thinkstats2.EstimatedPdf(data) thinkplot.Pdf(pdf, label='adult weight') thinkplot.Save(root='density_wtkg2_kde', xlabel='kg', ylabel='PDF', xlim=[0, 200])
def main(): data = 20, 15, 3 probs = numpy.linspace(0, 1, 31) hypos = [] for n in range(32, 350): for p1 in probs: for p2 in probs: hypos.append((n, p1, p2)) suite = Lincoln(hypos) suite.Update(data) n_marginal = suite.Marginal(0) thinkplot.Pmf(n_marginal, label='n') thinkplot.Save(root='lincoln1', xlabel='number of bugs', ylabel='PMF', formats=['pdf', 'png']) print('post mean n', n_marginal.Mean()) print('MAP n', n_marginal.MaximumLikelihood()) p1_marginal = suite.Marginal(1, label='p1') p2_marginal = suite.Marginal(2, label='p2') thinkplot.Pdf(p1_marginal) thinkplot.Pdf(p2_marginal) thinkplot.Show() print('post mean p1', p1_marginal.Mean()) print('MAP p1', p1_marginal.MaximumLikelihood()) print('post mean p2', p2_marginal.Mean()) print('MAP p2', p2_marginal.MaximumLikelihood()) print('p1 > p2', p1_marginal > p2_marginal) print('p1 < p2', p1_marginal < p2_marginal)
def MakePdfExample(n=500): """Plots a normal density function and a KDE estimate. n: sample size """ # mean and var of women's heights in cm, from the BRFSS mean, var = 163, 52.8 std = math.sqrt(var) # make a PDF and compute a density, FWIW pdf = thinkstats2.NormalPdf(mean, std) print(pdf.Density(mean + std)) # make a PMF and plot it thinkplot.PrePlot(2) thinkplot.Pdf(pdf, label='normal') # make a sample, make an estimated PDF, and plot it sample = [random.gauss(mean, std) for _ in range(n)] sample_pdf = thinkstats2.EstimatedPdf(sample) thinkplot.Pdf(sample_pdf, label='sample KDE') thinkplot.Save(root='pdf_example', xlabel='Height (cm)', ylabel='Density')
def MakePmfPlot(alpha = 10): """Plots Pmf of location for a range of betas.""" locations = range(0, 31) betas = [10, 20, 40] thinkplot.PrePlot(num=len(betas)) for beta in betas: pmf = MakeLocationPmf(alpha, beta, locations) pmf.name = 'beta = %d' % beta thinkplot.Pdf(pmf) thinkplot.Save('paintball1', xlabel='Distance', ylabel='Prob', formats=FORMATS)
def main(): ps = numpy.linspace(0, 1, 101) bill = Billiards(ps) bill.Update((5, 3)) thinkplot.Pdf(bill) thinkplot.Save(root='billiards1', xlabel='probability of win', ylabel='PDF', formats=['png']) bayes_result = ProbWinMatch(bill) print(thinkbayes.Odds(1-bayes_result)) mle = 5 / 8 freq_result = (1-mle)**3 print(thinkbayes.Odds(1-freq_result))
def MakeConditionalPlot(suite): """Plots marginal CDFs for alpha conditioned on beta. suite: posterior joint distribution of location """ betas = [10, 20, 40] thinkplot.PrePlot(num=len(betas)) for beta in betas: cond = suite.Conditional(0, 1, beta) cond.name = 'beta = %d' % beta thinkplot.Pdf(cond) thinkplot.Save('paintball3', xlabel='Distance', ylabel='Prob', formats=FORMATS)
def main(): df = hinc.ReadData() log_sample = InterpolateSample(df, log_upper=6.0) log_cdf = thinkstats2.Cdf(log_sample) thinkplot.Cdf(log_cdf) thinkplot.Show(xlabel='household income', ylabel='CDF') sample = np.power(10, log_sample) mean, median = density.Summarize(sample) cdf = thinkstats2.Cdf(sample) print('cdf[mean]', cdf[mean]) pdf = thinkstats2.EstimatedPdf(sample) thinkplot.Pdf(pdf) thinkplot.Show(xlabel='household income', ylabel='PDF')
def main(): df = hinc.ReadData() log_sample = InterpolateSample(df, log_upper=6.0) log_cdf = thinkstats2.Cdf(log_sample) thinkplot.Cdf(log_cdf) thinkplot.Show(xlabel='household income', ylabel='CDF') sample = np.power(10, log_sample) mean = np.mean(sample) cdf = thinkstats2.Cdf(sample) print "Median:", np.median(sample) print "Mean:", mean print "Skewness:", thinkstats2.Skewness(sample) print "Pearson's Skewness:", thinkstats2.PearsonMedianSkewness(sample) print "Percent of people with incomes <= mean:", cdf[mean] pdf = thinkstats2.EstimatedPdf(sample) thinkplot.Pdf(pdf)
# # - probability dencity function # # $PDF_{normal}(x) = \frac{1}{\sigma \sqrt{2\pi}}\exp[-\frac{1}{2}(\frac{x-\mu}{\sigma})^2]$ #%% import thinkstats2 import math mean, var = 163, 52.8 std = math.sqrt(var) pdf = thinkstats2.NormalPdf(mean, std) pdf.Density(mean + std) #%% import thinkplot thinkplot.Pdf(pdf, label='normal') thinkplot.Show(xlabel='height (cm)', ylabel='dencity') #%% pmf = pdf.MakePmf() #%% [markdown] # ## 6.2 KDE # # - Kernel density estimation #%% import random sample = [random.gauss(mean, std) for _ in range(500)] sample_pdf = thinkstats2.EstimatedPdf(sample) thinkplot.Pdf(sample_pdf, label='sample KDE')