示例#1
0
def Main():
    Ratio()
    return

    pmf1 = Pmf.Pmf()
    for x in range(0, 101):
        pmf1.Set(x, 1)
    pmf1.Normalize()

    pmf2 = TrianglePrior()

    # plot the priors
    myplot.Clf()
    myplot.Pmfs([pmf1, pmf2])
    myplot.Save(root='simple_coin_both_prior',
                title='Biased coin',
                xlabel='x',
                ylabel='Probability')

    RunUpdate(pmf1)
    RunUpdate(pmf2)

    # plot the posterior distributions
    myplot.Clf()
    myplot.Pmfs([pmf1, pmf2])
    myplot.Save(root='simple_coin_both_post',
               title='Biased coin',
               xlabel='x',
               ylabel='Probability')
示例#2
0
def Main():
    truth = ReadTruth()
    truth_map = {}
    for pcode, label in truth:
        truth_map[pcode] = label

    labels = ReadLabels()
    photo_map, labeler_map = MakeObjects(labels)

    RunUpdates(photo_map, labeler_map, labels)

    yes = []
    no = []
    for pcode, photo in photo_map.iteritems():
        if pcode in truth_map:
            mean = photo.Mean()

            if truth_map[pcode] == '1':
                yes.append(mean)
            else:
                no.append(mean)

    myplot.Clf()
    cdf_yes = thinkbayes.MakeCdfFromList(yes, name='yes')
    cdf_no = thinkbayes.MakeCdfFromList(no, name='no')
    myplot.Cdfs([cdf_yes, cdf_no])
    myplot.Show()

    return

    myplot.Clf()
    PlotPosteriorMeans(photo_map, 'photos')
    PlotPosteriorMeans(labeler_map, 'labelers')
    myplot.Show()
示例#3
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)

    # plot the distribution of actual speeds
    pmf = Pmf.MakePmfFromList(speeds, 'actual speeds')

    # myplot.Clf()
    # myplot.Hist(pmf)
    # myplot.Save(root='observed_speeds',
    #             title='PMF of running speed',
    #             xlabel='speed (mph)',
    #             ylabel='probability')

    # plot the biased distribution seen by the observer
    biased = BiasPmf(pmf, 7.5, name='observed speeds')

    myplot.Clf()
    myplot.Hist(biased)
    myplot.Save(root='observed_speeds',
                title='PMF of running speed',
                xlabel='speed (mph)',
                ylabel='probability')

    cdf = Cdf.MakeCdfFromPmf(biased)

    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.show(root='observed_speeds_cdf',
                title='CDF of running speed',
                xlabel='speed (mph)',
                ylabel='cumulative probability')
示例#4
0
def main():
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir='res')
    d = resp.SummarizeHeight()

    man_d = d[1]
    lady_d = d[2]

    # 男性的mu, var, sigma, 变异系数CV
    man_mu, man_var = thinkstats.TrimmedMeanVar(man_d)
    man_sigma = math.sqrt(man_var)
    man_cv = man_sigma/man_mu
    print("man: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (man_mu, man_var, man_sigma, man_cv))

    # 女性的mu, var, sigma, 变异系数CV
    lady_mu, lady_var = thinkstats.TrimmedMeanVar(lady_d)
    lady_sigma = math.sqrt(lady_var)
    lady_cv = lady_sigma/lady_mu
    print("lady: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (lady_mu, lady_var, lady_sigma, lady_cv))

    # 男性, 女性Hist分布
    man_hist = Pmf.MakeHistFromList(man_d, name='man hist')
    myplot.Hist(man_hist)
    myplot.Show()

    myplot.Clf()

    lady_hist = Pmf.MakeHistFromList(lady_d, name='lady hist')
    myplot.Hist(lady_hist)
    myplot.Show()

    myplot.Clf()

    # 男性, 女性Pmf分布
    man_pmf = Pmf.MakePmfFromHist(man_hist, name='man pmf')
    myplot.Pmf(man_pmf)
    myplot.Show()

    myplot.Clf()

    lady_pmf = Pmf.MakePmfFromHist(lady_hist, name='lady pmf')
    myplot.Pmf(lady_pmf)
    myplot.Show()

    myplot.Clf()

    # 男性/女性Cdf累积分布
    man_cdf = Cdf.MakeCdfFromPmf(man_pmf, name='man cdf')
    lady_cdf = Cdf.MakeCdfFromPmf(lady_pmf, name='lady cdf')
    myplot.Cdfs((man_cdf, lady_cdf), complement=False, transform=None)
    myplot.Show()
示例#5
0
def process(data):
    # Hist 分布图
    hist = Pmf.MakeHistFromList(data, name='hist')
    myplot.Hist(hist, color='blue')
    myplot.Show()

    # Pmf 分布图
    pmf = Pmf.MakePmfFromHist(hist, name='pmf')
    myplot.Pmf(pmf, color='yellow')
    myplot.Show()

    myplot.Clf()

    # 实际数据的CDF分布图
    cdf = Cdf.MakeCdfFromList(data, name='loafs')
    myplot.Cdf(cdf)

    mu, var = thinkstats.MeanVar(data)
    sigma = math.sqrt(var)
    print("mu = %.3f, sigma = %.3f" % (mu, sigma))

    # 正态分布
    xs = normal_sample(len(data), mu, sigma)  # xs = data
    ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in xs]
    myplot.Scatter(xs, ys, color='red', label='sample')
    myplot.Show()
def PlotAges(resp):
    """Plot the distribution of ages."""
    ages = [r.age for r in resp.records]
    cdf = Cdf.MakeCdfFromList(ages)
    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Show()
示例#7
0
def main():
    sz, alph, exem = 1000, 1.7, 100
    lst = paretovariate(sz, alph, exem)
    lst_cdf = Cdf.MakeCdfFromList(lst)

    myplot.Clf()
    myplot.Cdf(lst_cdf, complement=True, xscale='log', yscale='log')
    myplot.Show(title='CCDF of {0} random paretovariates'.format(sz))
示例#8
0
def MakeCdfs(lens):
    cdf = Cdf.MakeCdfFromList(lens, 'slashdot')

    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Save(root='slashdot.logx',
                xlabel='Number of friends/foes',
                ylabel='CDF',
                xscale='log')

    myplot.Clf()
    myplot.Cdf(cdf, complement=True)
    myplot.Save(root='slashdot.loglog',
                xlabel='Number of friends/foes',
                ylabel='CDF',
                xscale='log',
                yscale='log')
示例#9
0
def MakeFigures(pool, firsts, others):
    """Creates several figures for the book."""

    # CDF of all ages
    myplot.Clf()
    myplot.Cdf(pool.age_cdf)
    myplot.Save(root='agemodel_age_cdf',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF',
                legend=False)

    # CDF of all weights
    myplot.Clf()
    myplot.Cdf(pool.weight_cdf)
    myplot.Save(root='agemodel_weight_cdf',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF',
                legend=False)

    # plot CDFs of birth ages for first babies and others
    myplot.Clf()
    myplot.Cdfs([firsts.age_cdf, others.age_cdf])
    myplot.Save(root='agemodel_age_cdfs',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF')

    myplot.Clf()
    myplot.Cdfs([firsts.weight_cdf, others.weight_cdf])
    myplot.Save(root='agemodel_weight_cdfs',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF')

    # make a scatterplot of ages and weights
    ages, weights = GetAgeWeight(pool)
    pyplot.clf()
    #pyplot.scatter(ages, weights, alpha=0.2)
    pyplot.hexbin(ages, weights, cmap=matplotlib.cm.gray_r)
    myplot.Save(root='agemodel_scatter',
                xlabel='Age (years)',
                ylabel='Birth weight (oz)',
                legend=False)
示例#10
0
def Resample(cdf, n=10000):
    sample = cdf.Sample(n)
    new_cdf = Cdf.MakeCdfFromList(sample, 'resampled')
    myplot.Clf()
    myplot.Cdfs([cdf, new_cdf])
    myplot.Save(root='resample_cdf',
                title='CDF',
                xlabel='weight in oz',
                ylabel='CDF(x)')
示例#11
0
def main():
    list = [100 * random.random() for i in range(1000)]
    pmf = Pmf.MakePmfFromList(list, name='pfm')
    cdf = Cdf.MakeCdfFromList(list, name='cdf')
    myplot.Pmf(pmf)
    myplot.Show()
    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Show()
示例#12
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)
    pmf = Pmf.MakePmfFromList(speeds, 'actual speeds')

    observed = BiasPmf(pmf, 7.5, 'observed speeds')
    myplot.Clf()
    myplot.Hist(observed)
    myplot.Show(title='observed speeds',
                xlabel='speed (mph)',
                ylabel='probability')
示例#13
0
def main(script, *args):
    data = ReadIncomeFile()
    hist, pmf, cdf = MakeIncomeDist(data)

    # plot the CDF on a log-x scale
    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Save(root='income_logx',
                xscale='log',
                xlabel='income',
                ylabel='CDF')

    # plot the complementary CDF on a log-log scale
    myplot.Clf()
    myplot.Cdf(cdf, complement=True)
    myplot.Save(root='income_loglog',
                complement=True,
                xscale='log',
                yscale='log',
                xlabel='income',
                ylabel='complementary CDF')
示例#14
0
def MakeExample():
    """Make a simple example CDF."""
    t = [2, 1, 3, 2, 5]
    cdf = Cdf.MakeCdfFromList(t)
    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Save(root='example_cdf',
                title='CDF',
                xlabel='x',
                ylabel='CDF(x)',
                axis=[0, 6, 0, 1],
                legend=False)
示例#15
0
def MakeFigures(pool, firsts, others):
    """Creates several figures for the book."""

    # plot PMFs of birth weights for first babies and others
    myplot.Clf()
    myplot.Hist(firsts.weight_pmf, linewidth=0, color='blue')
    myplot.Hist(others.weight_pmf, linewidth=0, color='orange')
    myplot.Save(root='nsfg_birthwgt_pmf',
                title='Birth weight PMF',
                xlabel='weight (ounces)',
                ylabel='probability')

    # plot CDFs of birth weights for first babies and others
    myplot.Clf()
    myplot.Cdf(firsts.weight_cdf, linewidth=2, color='blue')
    myplot.Cdf(others.weight_cdf, linewidth=2, color='orange')
    myplot.Save(root='nsfg_birthwgt_cdf',
                title='Birth weight CDF',
                xlabel='weight (ounces)',
                ylabel='probability',
                axis=[0, 200, 0, 1])
示例#16
0
def MakeFigures(exam, alice, bob):
    formats = ['png']

    myplot.Pmf(exam.prior, label='prior')
    myplot.Save(root='sat_prior', formats=formats, xlabel='p', ylabel='PMF')

    myplot.Clf()
    myplot.Pmfs([alice, bob])
    myplot.Save(root='sat_posterior',
                formats=formats,
                xlabel='p',
                ylabel='PMF')
def MakeFigures():
    pops = populations.ReadData()
    print len(pops)

    cdf = Cdf.MakeCdfFromList(pops, 'populations')

    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Save(root='populations',
                title='City/Town Populations',
                xlabel='population',
                ylabel='CDF',
                legend=False)

    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Save(root='populations_logx',
                title='City/Town Populations',
                xlabel='population',
                ylabel='CDF',
                xscale='log',
                legend=False)

    myplot.Clf()
    myplot.Cdf(cdf, complement=True)
    myplot.Save(root='populations_loglog',
                title='City/Town Populations',
                xlabel='population',
                ylabel='Complementary CDF',
                yscale='log',
                xscale='log',
                legend=False)

    t = [math.log(x) for x in pops]
    t.sort()
    rankit.MakeNormalPlot(t, 'populations_rankit')
def PlotCdfs(samples):
    """Make CDFs showing the distribution of outliers."""
    cdfs = []
    for label, sample in samples.iteritems():
        outliers = [x for x in sample if x < 150]

        cdf = Cdf.MakeCdfFromList(outliers, label)
        cdfs.append(cdf)

    myplot.Clf()
    myplot.Cdfs(cdfs)
    myplot.Save(root='bayes_height_cdfs',
                title='CDF of height',
                xlabel='Reported height (cm)',
                ylabel='CDF')
示例#19
0
def MakeFigures(pmf, biased_pmf):
    """Makes figures showing the CDF of the biased and unbiased PMFs"""
    cdf = Cdf.MakeCdfFromPmf(pmf, 'unbiased')
    print('unbiased median', cdf.Percentile(50))
    print('percent < 100', cdf.Prob(100))
    print('percent < 1000', cdf.Prob(1000))

    biased_cdf = Cdf.MakeCdfFromPmf(biased_pmf, 'biased')
    print('biased median', biased_cdf.Percentile(50))

    myplot.Clf()
    myplot.Cdfs([cdf, biased_cdf])
    myplot.Save(root='slashdot.logx',
                xlabel='Number of friends/foes',
                ylabel='CDF',
                xscale='log')
示例#20
0
def main():
    babies = BabyBoom.Babies()
    babies.ReadRecords(data_dir='res', n=None)
    lastmin = 0
    interval = []
    for item in babies.records:
        interval.append(item.minutes - lastmin)
        lastmin = item.minutes

    cdf = Cdf.MakeCdfFromList(interval, name='baby interval')
    myplot.Cdf(cdf, complement=False, transform=None)
    myplot.Show()

    # y轴取log(CCDF) : CCDF(X) = 1 - CDF(X)
    myplot.Clf()
    myplot.Cdf(cdf, complement=True, yscale='log')
    myplot.Show()
示例#21
0
def CmpNormalModelWithDataSample():
    firsts, others, babies = Babies.PartitionBabies()
    weights = Babies.GetWightList(babies)
    pmf = Pmf.MakePmfFromList(weights)
    mu = pmf.Mean()
    var = pmf.Var(mu)
    sigma = math.sqrt(var)
    print("mu = {}, var = {}, sigma = {}".format(mu, var, sigma))

    # 经验分布, 数据
    cdf = Cdf.MakeCdfFromPmf(pmf, name='data')
    myplot.cdf(cdf)

    # u, sigma --> 误差函数计算 模型
    xs, yy = pmf.Render()
    ys = [erf.NormalCdf(x, mu, sigma) for x in xs]
    myplot.Plot(xs, ys, label='Model')
    myplot.Show()
    myplot.Clf()
示例#22
0
def MakeFigure(xmin=100, alpha=1.7, mu=150, sigma=25):
    """Makes a figure showing the CDF of height in ParetoWorld.

    Compared to a normal distribution.

    xmin: parameter of the Pareto distribution
    alpha: parameter of the Pareto distribution
    mu: parameter of the Normal distribution
    sigma: parameter of the Normal distribution
    """

    t1 = [xmin * random.paretovariate(alpha) for i in range(10000)]
    cdf1 = Cdf.MakeCdfFromList(t1, name='pareto')

    t2 = [random.normalvariate(mu, sigma) for i in range(10000)]
    cdf2 = Cdf.MakeCdfFromList(t2, name='normal')

    myplot.Clf()
    myplot.Cdfs([cdf1, cdf2])
    myplot.Save(root='pareto_world2',
                title='Pareto World',
                xlabel='height (cm)',
                ylabel='CDF')
示例#23
0
# Example 3-10

import random, Pmf, Cdf, myplot

size = 10000
lst = [random.random() for i in range(size)]
lst_pmf = Pmf.MakePmfFromList(lst)
lst_cdf = Cdf.MakeCdfFromList(lst)

myplot.Clf()
myplot.Pmf(lst_pmf)
myplot.Show(title='PMF of {0} randoms'.format(size))

myplot.Clf()
myplot.Cdf(lst_cdf)
myplot.Show(title='CDF of {0} randoms'.format(size))

# yes, the distribution is uniform