示例#1
0
def main():
    hypos = numpy.linspace(0, 12, 201)

    # start with a prior based on a pseudo observation
    # chosen to yield the right prior mean
    suite1 = Soccer(hypos, label='Germany')
    suite1.Update(0.34)
    suite2 = suite1.Copy(label='Argentina')

    # update with the results of World Cup 2014 final
    suite1.Update(1)
    suite2.Update(0)

    print('posterior mean Germany', suite1.Mean())
    print('posterior mean Argentina', suite2.Mean())

    # plot the posteriors
    thinkplot.PrePlot(2)
    thinkplot.Pdfs([suite1, suite2])
    thinkplot.Show()

    # TODO: compute posterior prob Germany is better than Argentina

    # TODO: compute the Bayes factor of the evidence

    # compute predictive distributions for goals scored in a rematch
    pred1 = suite1.PredictiveDist(label='Germany')
    pred2 = suite2.PredictiveDist(label='Argentina')
    
    # plot the predictive distributions
    thinkplot.PrePlot(2)
    thinkplot.Pdfs([pred1, pred2])
    thinkplot.Show()
示例#2
0
def main():
    """
    """

    user = User(label='user')
    beta = thinkbayes2.Beta(2, 1)
    for val, prob in beta.MakePmf().Items():
        user.Set(val * 100, prob)
    thinkplot.Pdf(user)
    thinkplot.Show()
    print(user.Mean(), user.CredibleInterval(90))
    mean_r = user.Mean() / 100.0

    link = Link(range(0, 101), label='link')
    thinkplot.Pdf(link)
    thinkplot.Show()
    print(link.Mean(), link.CredibleInterval(90))
    mean_q = link.Mean() / 100.0

    user.Update(('up', mean_q))
    thinkplot.Pdf(user)
    thinkplot.Show()
    print(user.Mean(), user.CredibleInterval(90))

    link.Update(('up', mean_r))
    thinkplot.Pdf(link)
    thinkplot.Show()
    print(link.Mean(), link.CredibleInterval(90))

    return 0
示例#3
0
def CH7_3(show = 1):
    """
    计算后验分布

    棕熊队    加人队
    bruins   canucks
      0         1
      2         3 
      8         1
      4         0
    """

    suite1 = Hockey('bruins')
    suite2 = Hockey('canucks')

    if show:
        thinkplot.Clf()
        thinkplot.PrePlot(num=2)
        thinkplot.Pmf(suite1)
        thinkplot.Pmf(suite2)
        thinkplot.Show(title='PRE', xlabel='Goals per game', ylabel='Probability')

    suite1.UpdateSet([0, 2, 8, 4])
    suite2.UpdateSet([1, 3, 1, 0])

    if show:
        # 观察最有可能lam的值, 每场比赛进球数的后验分布
        thinkplot.Clf()
        thinkplot.PrePlot(num=2)
        thinkplot.Pmf(suite1)
        thinkplot.Pmf(suite2)
        thinkplot.Show(title='POST', xlabel='Goals per game', ylabel='Probability')

    return suite1, suite2
示例#4
0
def CycleExtract(fw, data, pnum, trial, plane, marker, plot, plot2):

    if fw == 'AFO':
        choicedata = data[0]
    elif fw == 'PPAFO':
        choicedata = data[1]
    elif fw == 'Shoes':
        choicedata = data[2]

    strike_charac, strike_loc = HeelStrike(fw, data, pnum, trial, marker, plot)
    num_cycles = len(strike_charac)

    dataframe = choicedata[pnum].GetTrial(trial).GetData(plane)

    cycle_set = []

    for i in range(num_cycles - 1):

        start_rowindex = strike_charac[i][0] + 40
        end_rowindex = strike_charac[i + 1][0] + 50

        cycle = dataframe[start_rowindex:end_rowindex]
        index = range(start_rowindex, end_rowindex, 1)

        cycle_set.append((index, cycle))

    if plot2 == True:
        for j in range(len(cycle_set)):

            index, cycle = cycle_set[j]

            thinkplot.Plot(dataframe['R_HEEL'],
                           color='blue',
                           label='Right full set')
            thinkplot.Plot(index,
                           cycle['R_HEEL'],
                           color='red',
                           label='Right cycle set')
            thinkplot.Show(legend=True)

            thinkplot.Plot(dataframe['L_HEEL'],
                           color='blue',
                           label='Left full set')
            thinkplot.Plot(index,
                           cycle['L_HEEL'],
                           color='red',
                           label='Left cycle set')
            thinkplot.Show(legend=True)

    return cycle_set
示例#5
0
def sin_spectrum():
    wave = thinkdsp.make_note(69, 0.5, SinSignal)
    spectrum = wave.spectrum()
    spectrum.plot()
    thinkplot.Show()

    peaks = spectrum.peaks()
    print peaks[0]

    wave2 = spectrum.make_wave()

    wave2.plot()
    thinkplot.Show()

    wave2.write()
示例#6
0
def main():
    filename = 'mystery0.dat'
    data = read_file(filename)
    cdf = thinkstats2.MakeCdfFromList(data)

    thinkplot.SubPlot(2, 3, 1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2, 3, 2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2, 3, 3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(2, 3, 4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(2, 3, 5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(2, 3, 6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show()
示例#7
0
def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(num=6, rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf, color='C0', label=filename)
    thinkplot.Config(title='CDF on linear scale', ylabel='CDF')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log', color='C0')
    thinkplot.Config(title='CDF on log-x scale', ylabel='CDF', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential', color='C0')
    thinkplot.Config(title='CCDF on log-y scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys, color='C0')
    thinkplot.Config(title='Normal probability plot',
                     xlabel='random normal',
                     ylabel='data')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto', color='C0')
    thinkplot.Config(title='CCDF on log-log scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull', color='C0')
    thinkplot.Config(title='CCDF on loglog-y log-x scale',
                     ylabel='log log CCDF',
                     **scale)

    thinkplot.Show(legend=False)
示例#8
0
def ClassSizes():

    # start with the actual distribution of class sizes from the book
    d = {
        7: 8,
        12: 8,
        17: 14,
        22: 4,
        27: 6,
        32: 12,
        37: 8,
        42: 3,
        47: 2,
    }

    # form the pmf
    pmf = thinkstats2.MakePmfFromDict(d, 'actual')
    print 'mean', pmf.Mean()
    print 'var', pmf.Var()

    # compute the biased pmf
    biased_pmf = BiasPmf(pmf, 'observed')
    print 'mean', biased_pmf.Mean()
    print 'var', biased_pmf.Var()

    # unbias the biased pmf
    unbiased_pmf = UnbiasPmf(biased_pmf, 'unbiased')
    print 'mean', unbiased_pmf.Mean()
    print 'var', unbiased_pmf.Var()

    # plot the Pmfs
    thinkplot.Pmfs([pmf, biased_pmf])
    thinkplot.Show(xlabel='Class size', ylabel='PMF')
示例#9
0
def main():
    k = 15
    f = 0.1

    # plot Detector suites for a range of hypothetical r
    thinkplot.PrePlot(num=3)
    for r in [100, 250, 400]:
        suite = Detector(r, f, step=1)
        suite.Update(k)
        thinkplot.Pmf(suite)
        print(suite.MaximumLikelihood())

    thinkplot.Show(xlabel='Number of particles (n)', ylabel='PMF')

    return

    # plot the posterior distributions of r and n
    hypos = range(1, 501, 5)
    suite = Emitter2(hypos, f=f)
    suite.Update(k)

    thinkplot.PrePlot(num=2)
    post_r = suite.DistOfR(name='posterior r')
    post_n = suite.DistOfN(name='posterior n')

    thinkplot.Pmf(post_r)
    thinkplot.Pmf(post_n)

    thinkplot.Save(root='jaynes2',
                   xlabel='Emission rate',
                   ylabel='PMF',
                   formats=FORMATS)
def scatter(x):
    tot_crimes = df.Total_crimes
    thinkplot.Scatter(df[x], tot_crimes, alpha=.5)
    if x == 'month':
        thinkplot.Show(title="Total Crimes vs Time",
                       xlabel="Year",
                       ylabel="Total Crimes")
    else:
        thinkplot.Show(title="Total Crimes vs " + x + " Crimes",
                       xlabel=x + " Crimes",
                       ylabel="Total Crimes")
        print(x + " crime stats")
        print("Spearman's correlation:",
              thinkstats2.SpearmanCorr(tot_crimes, df[x]))
        print("Covariance:", thinkstats2.Cov(tot_crimes, df[x]))
        print()
示例#11
0
def main():
    probs = numpy.linspace(0, 1, 101)

    hypos = []
    for q in probs:
        for r in probs:
            hypos.append((q, r))

    suite = Volunteer(hypos)

    # update the Suite with the larger sample of students who
    # signed up and reported
    data = 140, 50
    suite.Update(data)

    # update again with the smaller sample of students who signed
    # up, participated, and reported
    data = 5, 3, 1
    suite.Update(data)

    #p_marginal = MarginalProduct(suite)
    q_marginal = MarginalDistribution(suite, 0)
    r_marginal = MarginalDistribution(suite, 1)

    thinkplot.Pmf(q_marginal, label='q')
    thinkplot.Pmf(r_marginal, label='r')
    #thinkplot.Pmf(p_marginal)
    thinkplot.Show()
示例#12
0
def main():

    suite = Version3()
    print(suite.Mean())

    thinkplot.Pdf(suite)
    thinkplot.Show(legend=False)
示例#13
0
def main():

    suite = Version3()
    print suite.Mean()

    thinkplot.Pmf(suite)
    thinkplot.Show()
示例#14
0
def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)

    print("median", thinkstats2.Median(log_sample))
    print("pearson's median skewness",
          thinkstats2.PearsonMedianSkewness(log_sample))
    print("skewness", thinkstats2.Skewness(log_sample))
    print("mean", log_cdf.Mean())

    print(
        "the higher our log_upper, the more right-skewed (according to g_1) or at least less left-skewed (according to g_p) things get"
    )
    print("the mean moves to the right a bit, too.")

    print("proportion of the population with income < mean",
          log_cdf.Prob(log_cdf.Mean()))
    print(
        "the higher the upper bound, the greater the proprtion below the mean."
    )

    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')
示例#15
0
def main():
    suite = Euro(range(0, 101))

    suite.Update('H')

    thinkplot.Pdf(suite)
    thinkplot.Show(xlabel='x', ylabel='Probability', legend=False)
示例#16
0
def Specific_Character(House, Gender, Class, ksweep, lamsweep, Title=''):
    """Knits many function together to produce a prediction for a given house, gender and class
	The house can be any key in hd, class can be 'Noble' or 'Small' or 'All' , and the gender can 
	be 'M' or 'F' or 'All'.  This also needs to make a linspace for k and lambda, so ksweep and 
	lsweep are lists of the form [lower limit, upper limit, number of points].  You can also 
	choose what to title your graph."""
    hd = PrepData()  #Get the data
    alive, dead = char_lists(hd, House, Gender,
                             Class)  #Sort by alive/dead for given attributes
    introductions, lifetimes = ages(alive, dead)  #Get ages and lifespans
    sf, haz = SurvivalHaz(introductions, lifetimes)  #Use kaplan-meyer
    lam = thinkbayes2.MakeUniformPmf(lamsweep[0], lamsweep[1],
                                     lamsweep[2])  #Our uniform priors
    k = thinkbayes2.MakeUniformPmf(ksweep[0], ksweep[1], ksweep[2])
    k, lam = MakeDistr(introductions, lifetimes, k, lam)  #Get our posterior

    thinkplot.PrePlot(2)
    thinkplot.Pdfs([k, lam])
    plt.xlabel('Value')
    plt.ylabel('Probability')
    plt.title('Posterior Distributions')
    print('If these distributions look chopped off, adjust kweep and lsweep')
    thinkplot.Show()

    mk = k.Mean()
    ml = lam.Mean()
    kl, kh = k.Percentile(5), k.Percentile(95)
    ll, lh = lam.Percentile(5), lam.Percentile(95)
    CredIntPlt(sf, kl, kh, ll, lh, House, mk, ml, Title)
    plt.show()
示例#17
0
def CH5_5():
    """
    最大值操作:
    转动3个6面的骰子, 计算它们的最大值 采用下面三种方式, 对比分布图.
    
    模拟:
    枚举:
    指数计算:

    """

    d6 = Die(6)
    k = 3

    # 模拟
    N = 1000
    dists = [d6] * k
    pmf = SampleMax(dists, N)
    pmf.name = 'sim'
    thinkplot.Pmf(pmf)

    # 枚举 km^2
    pmf = PmfMax(d6, d6)
    print("pmf1.Total() = %.3f" % pmf.Total())
    pmf = PmfMax(pmf, d6)
    print("pmf2.Total() = %.3f" % pmf.Total())
    pmf.name = 'enum'
    thinkplot.Pmf(pmf)

    # CDF (指数max) TODO 不是很明白???
    cdf = d6.Max(k)
    cdf.name = "expo"
    thinkplot.Cdf(cdf)

    thinkplot.Show(xlabel='max([d6]*3)', ylabel='probablity')
示例#18
0
def CH5_4():
    """
    加法操作:

    转动3个6面的骰子, 计算它们的和, 采用下面两种方式, 对比分布图.

    模拟:
        通过模拟随机样品, 累积和.
    枚举:
        枚举所有可能的数字对
    """

    d6 = Die(6)
    k = 3
    print("mean(d6) = %.3f, sum(probs) = %.3f" % (d6.Mean(), d6.Total()))

    # 模拟: 3个骰子分布, N越大越精确. 缺点: 耗时.
    N = 1000
    dists = [d6] * k
    pmf = thinkbayes.SampleSum(dists, N)
    pmf.name = 'sim'
    thinkplot.Pmf(pmf)
    print("mean([d6]*3) = %.3f, sum(*) = %.3f" % (pmf.Mean(), pmf.Total()))

    # 枚举: x数值相加, y概率相乘
    pmf = d6 + d6 + d6
    pmf.name = 'enum'
    thinkplot.Pmf(pmf)
    thinkplot.Show(xlabel='sum([d6]*3)', ylabel='probablity')
    print("mean([d6]*3) = %.3f, sum(*) = %.3f" % (pmf.Mean(), pmf.Total()))
示例#19
0
def ProcessScoresTeamwise(pairs):
    """Average number of goals for each team.

    pairs: map from (team1, team2) to (score1, score2)
    """
    # map from team to list of goals scored
    goals_scored = {}
    for key, entries in pairs.iteritems():
        t1, t2 = key
        for entry in entries:
            g1, g2 = entry
            goals_scored.setdefault(t1, []).append(g1)
            goals_scored.setdefault(t2, []).append(g2)

    # make a list of average goals scored
    lams = []
    for key, goals in goals_scored.iteritems():
        lam = thinkbayes2.Mean(goals)
        lams.append(lam)

    # make the distribution of average goals scored
    cdf = thinkbayes2.MakeCdfFromList(lams)
    thinkplot.Cdf(cdf)
    thinkplot.Show()

    mu, var = thinkbayes2.MeanVar(lams)
    print('mu, sig', mu, math.sqrt(var))
示例#20
0
def CH7_2():
    """
    http://www.ruanyifeng.com/blog/2015/06/poisson-distribution.html
    1. 一场比赛平均进球数为lam, 每场比赛进球分布: 泊松分布
            (进球可以在任何时间点发生)
        eg. 某医院平均每小时出生3个婴儿


        重点是: 次数

    2. 进球间隔的分布: 指数分布
        eg. 某医院婴儿出生的时间间隔(20分钟一个(0.3h))

        重点是: 间隔

    泊松分布是单位时间内独立事件发生次数的概率分布 
    指数分布是独立事件的时间间隔的概率分布
    """
    # 单位时间内出生1 - 10个婴儿的泊松分布
    pmf = thinkbayes.MakePoissonPmf(3, 10, step=1)
    thinkplot.Clf()
    thinkplot.Pmf(pmf)
    #  thinkplot.Show();

    # 婴儿出生时间间隔(20分钟)
    pmf = thinkbayes.MakeExponentialPmf(0.3, 10, n=200)
    thinkplot.Clf()
    thinkplot.Pmf(pmf)
    thinkplot.Show();
示例#21
0
def print_num_albums_per_artist(all_genres):
    num_albums_counts = {}
    num_albums_list = []
    for artist, albums in all_genres.items():
        num_albums = len(albums)
        num_albums_list.append(num_albums)

        if num_albums in num_albums_counts:
            num_albums_counts[num_albums] += 1
        else:
            num_albums_counts[num_albums] = 1

    num_artists = len(all_genres)
    num_albums = sum(num_albums_list)
    print("In total,", num_artists, "artists, producing", num_albums,
          "albums.")
    print("An average of", "%.2f" % (num_albums / num_artists),
          "albums per artist.")

    num_albums_hist = ts2.Hist(num_albums_counts)
    artists_more_than_6_albums = sum(
        [v for k, v in num_albums_hist.Items() if k > 6])

    print(artists_more_than_6_albums, 'artists with more than 6 albums.')

    tp.Hist(num_albums_hist)
    tp.Show(xlabel='Number of albums',
            ylabel='Count of artists with this number of albums',
            title='Histogram of the number of albums per artist')
示例#22
0
 def MakePlot(self):
     """Plot the CDFs."""
     thinkplot.Cdf(self.pmf_y.MakeCdf())
     thinkplot.Cdf(self.prior_zb.MakeCdf())
     thinkplot.Cdf(self.post_zb.MakeCdf())
     thinkplot.Cdf(self.pmf_mean_zb.MakeCdf())
     thinkplot.Show()
示例#23
0
def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)
    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')
示例#24
0
def main():
    filename = 'mystery0.dat'
    data = read_file(filename)

    pmf = thinkstats2.MakePmfFromList(data)
    cdf = thinkstats2.MakeCdfFromList(data)

    pdf = thinkstats2.EstimatedPdf(data)
    low, high = min(data), max(data)
    xs = numpy.linspace(low, high, 101)
    kde_pmf = pdf.MakePmf(xs)

    bin_data = BinData(data, low, high, 51)
    bin_pmf = thinkstats2.MakePmfFromList(bin_data)

    thinkplot.SubPlot(2, 2, 1)
    thinkplot.Hist(pmf, width=0.1)
    thinkplot.Config(title='Naive Pmf')

    thinkplot.SubPlot(2, 2, 2)
    thinkplot.Hist(bin_pmf)
    thinkplot.Config(title='Binned Hist')

    thinkplot.SubPlot(2, 2, 3)
    thinkplot.Pmf(kde_pmf)
    thinkplot.Config(title='KDE PDF')

    thinkplot.SubPlot(2, 2, 4)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='CDF')

    thinkplot.Show()
def main():
    hypos = numpy.linspace(0, 12, 201)
    suite = Soccer(hypos)

    # the mean number of goals per game was 2.67
    mean_rate = 2.67 / 2
    mean_interarrival = 90 / mean_rate

    # start with a prior based on the mean interarrival time
    suite.Update(mean_interarrival)
    thinkplot.Pdf(suite, label='prior')
    print('prior mean', suite.Mean())

    suite.Update(11)
    thinkplot.Pdf(suite, label='posterior 1')
    print('after one goal', suite.Mean())

    suite.Update(12)
    thinkplot.Pdf(suite, label='posterior 2')
    print('after two goals', suite.Mean())

    thinkplot.Show()

    # plot the predictive distribution
    suite.PredRemaining(90 - 23, 2)
示例#26
0
def ProcessScoresPairwise(pairs):
    """Average number of goals for each team against each opponent.

    pairs: map from (team1, team2) to (score1, score2)
    """
    # map from (team1, team2) to list of goals scored
    goals_scored = {}
    for key, entries in pairs.iteritems():
        t1, t2 = key
        for entry in entries:
            g1, g2 = entry
            goals_scored.setdefault((t1, t2), []).append(g1)
            goals_scored.setdefault((t2, t1), []).append(g2)

    # make a list of average goals scored
    lams = []
    for key, goals in goals_scored.iteritems():
        if len(goals) < 3:
            continue
        lam = thinkstats.Mean(goals)
        lams.append(lam)

    # make the distribution of average goals scored
    cdf = thinkbayes.MakeCdfFromList(lams)
    thinkplot.Cdf(cdf)
    thinkplot.Show()

    mu, var = thinkstats.MeanVar(lams)
    print('mu, sig', mu, math.sqrt(var))

    print('BOS v VAN', pairs['BOS', 'VAN'])
def main():
    coords = numpy.linspace(-100, 100, 101)
    joint = Gps(product(coords, coords))

    joint.Update((51, -15))
    joint.Update((48, 90))

    pairs = [(11.903060613102866, 19.79168669735705),
             (77.10743601503178, 39.87062906535289),
             (80.16596823095534, -12.797927542984425),
             (67.38157493119053, 83.52841028148538),
             (89.43965206875271, 20.52141889230797),
             (58.794021026248245, 30.23054016065644),
             (2.5844401241265302, 51.012041625783766),
             (45.58108994142448, 3.5718287379754585)]

    joint.UpdateSet(pairs)

    thinkplot.PrePlot(2)
    pdfx = joint.Marginal(0)
    pdfy = joint.Marginal(1)
    thinkplot.Pdf(pdfx, label='posterior x')
    thinkplot.Pdf(pdfy, label='posterior y')
    thinkplot.Show()

    print(pdfx.Mean(), pdfx.Std())
    print(pdfy.Mean(), pdfy.Std())
示例#28
0
def main():
    fair = Euro()
    fair.Set(50, 1)

    bias = Euro()
    for x in range(0, 51):
        bias.Set(x, x)
    for x in range(51, 101):
        bias.Set(x, 100-x)
    bias.Normalize()

    thinkplot.Pmf(bias)
    thinkplot.Show()

    # notice that we've changed the representation of the data
    data = 140, 110

    like_bias = AverageLikelihood(bias, data)
    print 'like_bias', like_bias

    like_fair = AverageLikelihood(fair, data)
    print 'like_fair', like_fair

    ratio = like_bias / like_fair
    print 'Bayes factor', ratio
示例#29
0
def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show(legend=False)
示例#30
0
def CH3_2():
    """
    火车头问题(Train)
    有一天看到一个编号60的火车头经过, 论共有多少个火车头?
    假设 上限 N = 1000, 500, 2000
    猜测结果对上限敏感

    实际N个火车头, 假设看到了60号火车头
    
    1       1/N      0
    2       1/N      0
    ...     ...     ...
    59      1/N      0
    60      1/N     1/60
    61      1/N     1/61
    ...     ...     ...
    1000    1/N     1/1000
    """

    # 假设有1 - 1000个编号的火车头
    N = 1000
    hypoes = range(1, N)
    suite = Train(hypoes) 
    suite.Update(60)
    thinkplot.PrePlot(num=1)
    thinkplot.Pmf(suite)
    thinkplot.Show(title='Train', xlabel='Number of trains', ylabel='Probability')
    print(suite.Mean())