Пример #1
0
def Main():
    # make a redditor with some trustworthiness (mean_t = 0.67)
    founder = Redditor(name='redditor')
    beta = thinkbayes.Beta(2, 1)
    for val, prob in beta.MakePmf().Items():
        founder.Set(val * 100, prob)

    # make a new item with unknown quality (mean_q = 0.5)
    item = Item(range(0, 101), name='item')

    # compute the means
    mean_t = founder.Mean() / 100.0
    mean_q = item.Mean() / 100.0

    print mean_t
    print mean_q

    # perform simultaneous updates
    founder.Update(('up', mean_q))
    item.Update(('up', mean_t))

    Summarize(item)

    # display the posterior distributions
    myplot.Pmf(founder)
    myplot.Pmf(item)
    myplot.Show()
Пример #2
0
def ExpoErlangDemo():
    num = 10

    lam1 = 1
    lam2 = 2
    t = MakeSeries(num, lam1, num, lam2)
    series = Series(t)
    n, s1, m, s2 = series.Split(num)

    print n, s1, m, s2

    low, high = 0.01, 5.01
    lams = numpy.linspace(low, high, 101)

    expo = Expo(lams)
    expo.name = 'expo'
    expo.Update((n, s1))

    erlang = Erlang(lams)
    erlang.name = 'erlang'
    erlang.Update((n, s1))

    myplot.Pmf(expo)
    myplot.Pmf(erlang)
    myplot.Show()
Пример #3
0
def main():
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir='res')
    d = resp.SummarizeHeight()

    man_d = d[1]
    lady_d = d[2]

    # 男性的mu, var, sigma, 变异系数CV
    man_mu, man_var = thinkstats.TrimmedMeanVar(man_d)
    man_sigma = math.sqrt(man_var)
    man_cv = man_sigma/man_mu
    print("man: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (man_mu, man_var, man_sigma, man_cv))

    # 女性的mu, var, sigma, 变异系数CV
    lady_mu, lady_var = thinkstats.TrimmedMeanVar(lady_d)
    lady_sigma = math.sqrt(lady_var)
    lady_cv = lady_sigma/lady_mu
    print("lady: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (lady_mu, lady_var, lady_sigma, lady_cv))

    # 男性, 女性Hist分布
    man_hist = Pmf.MakeHistFromList(man_d, name='man hist')
    myplot.Hist(man_hist)
    myplot.Show()

    myplot.Clf()

    lady_hist = Pmf.MakeHistFromList(lady_d, name='lady hist')
    myplot.Hist(lady_hist)
    myplot.Show()

    myplot.Clf()

    # 男性, 女性Pmf分布
    man_pmf = Pmf.MakePmfFromHist(man_hist, name='man pmf')
    myplot.Pmf(man_pmf)
    myplot.Show()

    myplot.Clf()

    lady_pmf = Pmf.MakePmfFromHist(lady_hist, name='lady pmf')
    myplot.Pmf(lady_pmf)
    myplot.Show()

    myplot.Clf()

    # 男性/女性Cdf累积分布
    man_cdf = Cdf.MakeCdfFromPmf(man_pmf, name='man cdf')
    lady_cdf = Cdf.MakeCdfFromPmf(lady_pmf, name='lady cdf')
    myplot.Cdfs((man_cdf, lady_cdf), complement=False, transform=None)
    myplot.Show()
Пример #4
0
def main():

    # make a uniform prior
    param = 1.2
    prior = MakeUniformSuite(0.5, 1.5, 1000)

    # try out the sample in the book
    t = []
    sample = [2.675, 0.198, 1.152, 0.787, 2.717, 4.269]
    name = 'post%d' % len(sample)
    posterior = EstimateParameter(prior, sample, name)
    t.append(posterior)

    # try out a range of sample sizes
    for n in [10, 20, 40]:

        # generate a sample
        sample = [random.expovariate(param) for _ in range(n)]
        name = 'post%d' % n

        # compute the posterior
        posterior = EstimateParameter(prior, sample, name)
        t.append(posterior)

    # plot the posterior distributions
    for i, posterior in enumerate(t):
        pyplot.subplot(2, 2, i+1)
        myplot.Pmf(posterior)
        pyplot.xlabel('lambda')
        pyplot.ylabel('Posterior probability')
        pyplot.legend()

    myplot.Save(root='posteriors')
Пример #5
0
def process(data):
    # Hist 分布图
    hist = Pmf.MakeHistFromList(data, name='hist')
    myplot.Hist(hist, color='blue')
    myplot.Show()

    # Pmf 分布图
    pmf = Pmf.MakePmfFromHist(hist, name='pmf')
    myplot.Pmf(pmf, color='yellow')
    myplot.Show()

    myplot.Clf()

    # 实际数据的CDF分布图
    cdf = Cdf.MakeCdfFromList(data, name='loafs')
    myplot.Cdf(cdf)

    mu, var = thinkstats.MeanVar(data)
    sigma = math.sqrt(var)
    print("mu = %.3f, sigma = %.3f" % (mu, sigma))

    # 正态分布
    xs = normal_sample(len(data), mu, sigma)  # xs = data
    ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in xs]
    myplot.Scatter(xs, ys, color='red', label='sample')
    myplot.Show()
Пример #6
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)

    # plot the distribution of actual speeds
    pmf = Pmf.MakePmfFromList(speeds, 'actual speeds')

    # myplot.Clf()
    # myplot.Hist(pmf)
    # myplot.Save(root='observed_speeds',
    #             title='PMF of running speed',
    #             xlabel='speed (mph)',
    #             ylabel='probability')

    # plot the biased distribution seen by the observer
    biased = BiasPmf(pmf, 7.5, name='observed speeds')
    myplot.Pmf(biased)
    myplot.Show(title='soln. PMF of running speed',
                xlabel='speed (mph)',
                ylabel='probability')
    myplot.Clf()
    myplot.Hist(biased)
    myplot.Save(root='observed_speeds',
                title='PMF of running speed',
                xlabel='speed (mph)',
                ylabel='probability')

    cdf = Cdf.MakeCdfFromPmf(biased)

    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Save(root='observed_speeds_cdf',
                title='CDF of running speed',
                xlabel='speed (mph)',
                ylabel='cumulative probability')
Пример #7
0
def observe_data(l, name=None, show=False):	
	cdf = pmf = None
	if isinstance(l, list):
		cdf = Cdf.MakeCdfFromList(l,name+' cdf')
		pmf = Pmf.MakePmfFromList(l, name+' pmf')
	elif isinstance(l, Pmf.Pmf):
		pmf = l
		cdf = Cdf.MakeCdfFromPmf(l)
		if name is None: name = pmf.name 
	elif isinstance(l, Cdf.Cdf):
		cdf = l
		if name is None: name = cdf.name 
	else:
		raise Exception('input parameter type is wrong')

	v_25, median, v_75 = cdf.Percentile(25), cdf.Percentile(50), cdf.Percentile(75)
	mean = cdf.Mean()
	print('%s: 1/4:%4.2f(%4.2f), 1/2:%4.2f(mean-median:%4.2f), mean:%4.2f, 3/4:%4.2f(%4.2f)' % \
	      (name, v_25, median-v_25, median, mean-median, mean, v_75,v_75-median))
	
	if show:
		if pmf is not None:
			myplot.Pmf(pmf)	
			myplot.Show()
		myplot.Cdf(cdf)
		myplot.Show()
Пример #8
0
def PlotPmf(results):
    speeds = GetSpeeds(results)
    pmf = Pmf.MakePmfFromList(speeds, 'speeds')
    myplot.Pmf(pmf,
               title='PMF of running speed',
               xlabel='speed (mph)',
               ylabel='probability',
               show=True)
Пример #9
0
def main():
    results = ReadResults()
    speeds = GetSpeeds(results)
    pmf = Pmf.MakePmfFromList(speeds, 'speeds')
    myplot.Pmf(pmf)
    myplot.Show(title='PMF of running speed',
                xlabel='speed (mph)',
                ylabel='probability')
Пример #10
0
def main():
    hypos = xrange(100, 1001)
    suite = Train(hypos)

    suite.Update(321)
    print suite.Mean()

    myplot.Pmf(suite)
    myplot.Show()
Пример #11
0
def main():
    list = [100 * random.random() for i in range(1000)]
    pmf = Pmf.MakePmfFromList(list, name='pfm')
    cdf = Cdf.MakeCdfFromList(list, name='cdf')
    myplot.Pmf(pmf)
    myplot.Show()
    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Show()
Пример #12
0
def PlotSurvivalCurve(ts, lams, ss):
    # scale lams
    denom = max(lams.Probs())
    lams.Normalize(denom)
    myplot.Pmf(lams,
               line_options=dict(linewidth=2, linestyle='dashed', color='0.7'))

    pyplot.plot(ts, ss, linewidth=2, color='blue', label='survival')
    myplot.Save(root='seer1',
                title='',
                xlabel='Survival time (years)',
                ylabel='Probability')
Пример #13
0
def main():
    suite = Euro(range(0, 101))

    #myplot.Pmf(suite)
    #myplot.Show()

    for i in range(140):
        suite.Update('H')
    for i in range(110):
        suite.Update('T')
    myplot.Pmf(suite)
    myplot.Show()
Пример #14
0
def MakeFigures(exam, alice, bob):
    formats = ['png']

    myplot.Pmf(exam.prior, label='prior')
    myplot.Save(root='sat_prior', formats=formats, xlabel='p', ylabel='PMF')

    myplot.Clf()
    myplot.Pmfs([alice, bob])
    myplot.Save(root='sat_posterior',
                formats=formats,
                xlabel='p',
                ylabel='PMF')
Пример #15
0
def Main():
    suite = Liar(y=0.1)

    dataset = 'H'

    for data in dataset:
        suite.Update(data)

    Summarize(suite)

    myplot.Pmf(suite)
    myplot.Show()
Пример #16
0
def main():
    suite = MakeUniformSuite(0.0, 1.0, 1001)
    evidence = 140, 110

    Update(suite, evidence)
    suite.name = 'posterior'

    # plot the posterior distributions
    myplot.Pmf(suite)
    myplot.Show(title='Biased coin',
                xlabel='P(heads)',
                ylabel='Posterior probability')
Пример #17
0
def main(script):

    # make an exam object with data from the 2010 SAT
    exam = Exam()

    # look up Alice's raw score
    alice = 780
    alice_correct = exam.GetRawScore(alice)
    print 'Alice raw score', alice_correct

    # display the distribution of raw scores for the population
    prior = exam.GetPrior()
    myplot.Pmf(prior, show=True)
Пример #18
0
def ExpoDemo():
    num = 10

    lam1 = 1
    lam2 = 2
    t = MakeSeries(num, lam1, num, lam2)
    series = Series(t)
    n, s1, m, s2 = series.Split(num)

    print n, s1, m, s2

    low, high = 0.01, 5.01
    lams = numpy.linspace(low, high, 101)

    expo = Expo(lams)
    expo.Update((n, s1))

    expo2 = Expo(lams)
    expo2.Update((m, s2))

    myplot.Pmf(expo)
    myplot.Pmf(expo2)
    myplot.Show()
Пример #19
0
def PlotPosteriorSigma(posterior):

    ci = CredibleInterval(posterior, 90)
    print 'CI:', ci

    pyplot.clf()
    PlotCredibleInterval(posterior, ci)

    myplot.Pmf(posterior,
               root='sigma',
               clf=False,
               title='Posterior PMF',
               xlabel='sigma',
               ylabel='probability',
               show=True)
Пример #20
0
def main():
    suite = MakeUniformSuite(0.001, 1.5, 1000)
    evidence = [1.5, 2, 3, 4, 5, 12]

    Update(suite, evidence)
    suite.name = 'posterior'

    # plot the posterior distributions
    myplot.Pmf(suite)
    myplot.Show(title='Decay parameter',
                xlabel='Parameter (inverse cm)',
                ylabel='Posterior probability')

    print 'Naive parameter estimate:', 1.0 / thinkstats.Mean(evidence)
    print 'Mean of the posterior distribution:', suite.Mean()
Пример #21
0
def main():
    # Exercise 3.1
    d = {
        7: 8,
        12: 8,
        17: 14,
        22: 4,
        27: 6,
        32: 12,
        37: 8,
        42: 3,
        47: 2
    }

    classSizeDean = Pmf.MakePmfFromDict(d, name='Actual')
    print(classSizeDean.Mean())

    classSizeStudent = classSizeDean.Copy(name='Student Perspective')
    for x, _ in classSizeStudent.Items():
        classSizeStudent.Mult(x, x)
    classSizeStudent.Normalize()
    print(classSizeStudent.Mean())

    classSizeUnbaised = UnbiasPmf(classSizeStudent, 'Student Unbiased')
    print(classSizeUnbaised.Mean())

    getValue = itemgetter(0)

    deanPlot = sorted(classSizeDean.Items(), key=getValue)
    studentPlot = sorted(classSizeStudent.Items(), key=getValue)
    plt.plot(zip(*deanPlot)[0], zip(*deanPlot)[1], 'g-', label='Actual')
    plt.plot(zip(*studentPlot)[0], zip(*studentPlot)[1], 'r-', label='Student Perspective')
    plt.legend(loc=4)
    plt.xlabel('Class Size')
    plt.ylabel('Probability')
    plt.show()

    #Exercise 3.2
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)
    unbaisedSpeedsPmf = Pmf.MakePmfFromList(speeds, 'speeds')
    biasedSpeedsPmf = BiasPmf(unbaisedSpeedsPmf, 7.5, '7.5 mph biased speeds')

    biasedPlot = sorted(biasedSpeedsPmf.Items(), key=getValue)
    myplot.Pmf(biasedSpeedsPmf)
    myplot.Show(title='7.5mph biased speeds',
                xlabel='speeds (mph)',
                ylabel='probability')
Пример #22
0
def main():
    ran = generate_random_sample(1000)
    pmf = Pmf.MakePmfFromList(ran)
    cdf = Cdf.MakeCdfFromPmf(pmf)

    myplot.Cdf(cdf)
    myplot.show()

    myplot.scatter(*cdf.Render())
    myplot.show()

    myplot.Hist(pmf)
    myplot.show()

    myplot.Pmf(pmf)
    myplot.show()
Пример #23
0
def main(script, *args):
    pmf = UniformOdds()
    cdf = Cdf.MakeCdfFromPmf(pmf)
    myplot.Cdf(cdf, show=True)
    return

    beta = Beta(1, 0)
    pmf = beta.Pmf()
    myplot.Pmf(pmf,
               show=True,
               xlabel='Probability of sunrise: p',
               ylabel='Probability density',
               title='Beta distribution')

    cdf = beta.Cdf()
    print cdf.Percentile(5)
    print cdf.Percentile(95)

    print cdf.Prob(0.5)
Пример #24
0
    def PlotPosteriorPmf(self, root=None, clf=False):
        if root: clf = True
        if clf: pyplot.clf()

        posterior = self.Pmf()

        cdf = self.Cdf()
        low, high = cdf.Percentile(5), cdf.Percentile(95)

        xs = [x for x in posterior.Values() if low <= x <= high]
        ys = [posterior.Prob(x) for x in xs]
        pyplot.fill_between(xs, ys, y2=0.0001, color='blue', alpha=0.2)

        myplot.Pmf(posterior,
                   root=root,
                   clf=False,
                   xlabel='# of taxa',
                   ylabel='prob',
                   legend=False)
Пример #25
0
def main():
    upper_bound = 200
    prior = MakeUniformSuite(1, upper_bound, upper_bound)
    prior.name = 'prior'

    evidence = 60
    posterior = prior.Copy()
    Update(posterior, evidence)
    posterior.name = 'posterior'

    print CredibleInterval(posterior, 90)

    # plot the posterior distribution
    pyplot.subplots_adjust(wspace=0.4, left=0.15)
    plot_options = dict(linewidth=2)

    myplot.Pmf(posterior, **plot_options)
    myplot.Save(root='locomotive',
                title='Locomotive problem',
                xlabel='Number of trains',
                ylabel='Posterior probability')
Пример #26
0
def main():
    # Exercise 3.9
    table = survey.Pregnancies()
    table.ReadRecords()
    unfilteredLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz)
                                  for p in table.records if p.outcome == 1]
    liveBirthWeights = [
        lbs * 16 + oz for lbs, oz in unfilteredLiveBirthWeights
        if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200
    ]
    liveBirthWeightsCdf = Cdf.MakeCdfFromList(liveBirthWeights,
                                              name="live birth weights")
    samepleListLiveBirthWeights = sample(liveBirthWeightsCdf, 1000)
    myplot.Cdf(Cdf.MakeCdfFromList(samepleListLiveBirthWeights))
    myplot.show(title="CDF of live births resampled")

    # Exercise 3.10
    randomList = [random.random() for x in range(1000)]
    myplot.Pmf(Pmf.MakePmfFromList(randomList))
    myplot.show(title="random pmf")
    myplot.Cdf(Cdf.MakeCdfFromList(randomList))
    myplot.Show(title="random cdf")
Пример #27
0
def main():
    p = optparse.OptionParser()
    p.add_option('--infile', '-i')
    p.add_option('--outfile', '-o')
    options, arguments = p.parse_args()
    ifile = sys.stdin
    ofile = sys.stdout
    if options.infile is not None:
        ifile = open(options.infile, 'r')

    if options.outfile is not None:
        ofile = open(options.outfile, 'w')

    word_count_dict = scan_file(ifile)
    cdf = Cdf.MakeCdfFromList(word_count_dict.values())
    pmf = Pmf.MakePmfFromList(word_count_dict.values())
    #myplot.Cdf(cdf, transform='pareto')
    #myplot.Show(title="KJV Biblical word frequency", complement=True, xscale='log', yscale='log')
    myplot.Pmf(pmf)
    myplot.Show(title='KJV Biblical word frequency',
                xscale='log',
                yscale='log')
Пример #28
0
import Cdf
import populations
import math
import myplot
import numpy
import Pmf

pops = populations.ReadData()
bucketed_pops = map(lambda(x):50*math.floor(x/50.0),pops)

pmf = Pmf.MakePmfFromList(bucketed_pops)
cdf = Cdf.MakeCdfFromPmf(pmf)

myplot.Pmf(pmf)
myplot.Show(title="Pmf of populations", xscale='log')

myplot.Cdf(cdf)
myplot.Show(title="Cdf of populations", inverse=True, xscale='log')

xs = sorted(numpy.random.normal(0, 1, len(pops)))
ys = sorted(bucketed_pops)

myplot.Plot(xs, ys)
myplot.Show(title="Normal plot for populations")

ys2 = sorted(map(lambda(y):math.log10(y+1),bucketed_pops))

myplot.Plot(xs, ys2)
myplot.Show(title="LogNormal plot for populations") 

#it looks more like a lognormal, but with a hard lower bound (as expected)
Пример #29
0
def MakePmf(table):
    lengths = [record.prglength for record in table.records]
    pmf = Pmf.MakePmfFromList(lengths, name='pregnancy length')
    myplot.Pmf(pmf, show=True, xlabel='weeks', ylabel='probability')
Пример #30
0
# Example 3-10

import random, Pmf, Cdf, myplot

size = 10000
lst = [random.random() for i in range(size)]
lst_pmf = Pmf.MakePmfFromList(lst)
lst_cdf = Cdf.MakeCdfFromList(lst)

myplot.Clf()
myplot.Pmf(lst_pmf)
myplot.Show(title='PMF of {0} randoms'.format(size))

myplot.Clf()
myplot.Cdf(lst_cdf)
myplot.Show(title='CDF of {0} randoms'.format(size))

# yes, the distribution is uniform