Python Cdfs示例，myplot.Cdfs Python示例

示例#1

0

显示文件

文件： cumulative.py 项目： wuxiang666/thinkstats

def MakeFigures(pool, firsts, others):
    """Creates several figures for the book."""

    bar_options = [
        dict(linewidth=0, color='blue'),
        dict(linewidth=0, color='orange')
    ]

    # plot PMFs of birth weights for first babies and others
    myplot.Hists([firsts.weight_pmf, others.weight_pmf],
                 root='nsfg_birthwgt_pmf',
                 bar_options=bar_options,
                 title='Birth weight PMF',
                 xlabel='weight (ounces)',
                 ylabel='probability')

    plot_options = [
        dict(linewidth=2, color='blue'),
        dict(linewidth=2, color='orange')
    ]

    # plot CDFs of birth weights for first babies and others
    myplot.Cdfs([firsts.weight_cdf, others.weight_cdf],
                root='nsfg_birthwgt_cdf',
                plot_options=plot_options,
                title='Birth weight CDF',
                xlabel='weight (ounces)',
                ylabel='probability',
                axis=[0, 200, 0, 1])

示例#2

0

显示文件

    def testGeneratePrevalence(self):
        sample = 'ab'
        prior = Pmf.MakePmfFromList(range(1, 4))
        meta = rarefaction.MetaHypo(prior, sample)

        for taxon in sample:
            meta.Update(taxon)

        k = 3
        taxon = 'other'
        iters = 1000
        plot = False

        hypos = meta.GetHypos()
        for hypo, prob in hypos.Items():
            self.assertAlmostEquals(prob, 0.4) if hypo.k==2 else 0
            self.assertAlmostEquals(prob, 0.6) if hypo.k==3 else 0

            if hypo.k != k or not plot:
                continue

            # check the distribution of generated prevalences
            ps = [hypo.GeneratePrevalence().Prob(taxon) for i in xrange(iters)]
            cdf = Cdf.MakeCdfFromList(ps)

            # compare to what the distribution is supposed to be
            dist = hypo.Get(taxon)
            ps2 = [dist.Random() for i in xrange(iters)]
            cdf2 = Cdf.MakeCdfFromList(ps2)

            myplot.Cdfs([cdf, cdf2], show=True)

示例#3

0

显示文件

def main():
    # Exercise 5.6
    sample_size=365
    for n in range(1,6):
        distribution = []
        for _ in range(sample_size):
            distribution.append(get_bread(n))
        print(np.mean(distribution), np.std(distribution))

    found_distribution = []
    for _ in range(sample_size):
        found_distribution.append(int(get_bread(4)))
    print('picking 4 loaves: mu={} sigma={}'.format(np.mean(found_distribution),np.std(found_distribution)))

    expected_distribution = np.random.normal(np.mean(found_distribution), np.std(found_distribution), sample_size)
    myplot.Cdfs(cdfs=(Cdf.MakeCdfFromList(found_distribution), Cdf.MakeCdfFromList(expected_distribution)))
    myplot.show()
    plt.hist(found_distribution, normed=True, bins=20, label='found')
    plt.hist(expected_distribution, normed=True, bins=20, alpha=.75, label='expected')
    plt.ylabel('Probability')
    plt.xlabel('Bread Weight')
    plt.legend()
    plt.show()

    # Exercise 5.7
    men_heights = np.random.normal(loc=178, scale=59.4, size=99999)
    women_heights = np.random.normal(loc=163, scale=52.8, size=99999)
    pairs = zip(men_heights, women_heights)
    l = [w>m for m, w in pairs]
    print('In {}% of the pairs the woman will be taller than the man'.format(sum(l)/len(l)*100.0))

示例#4

0

显示文件

def PlotDiffs(groups, low, high, root):
    """Plots the CDF of diffs for each group.

    Args:
        low, high: range of diffs to include
        root: string filename root
    """
    diff_list = []
    for gender, res in groups.iteritems():
        diffs = ComputeDiffs(res, low=low, high=high)
        diff_list.append((gender, diffs))
        print 'PlotDiffs', gender, len(diffs)

    cdfs = []
    for name, diffs in diff_list:
        cdf = Cdf.MakeCdfFromList(diffs, name=name)
        cdfs.append(cdf)

    options = [dict(linewidth=2) for cdf in cdfs]

    myplot.Cdfs(cdfs,
                xlabel='time - qualifying time (min)',
                ylabel='P(difference < x)',
                plot_options=options,
                root=root)

示例#5

0

显示文件

文件： caws.py 项目： rsjudge17/traffic-safety

def plot_accident_cdfs():
    """Plots CDF of accident counts for the control and treatment areas.

    Before and after the date CAWS was deployed (known to be November 1996).
    """
    cdfs = []
    for label in ['control', 'treatment']:
        print label
        filename = label + '_data.csv'
        col_dict = process_merged_file(filename)

        # November 15, 1996
        before, after = split_col_dict(col_dict, 1780)
        print 'before'
        cdf = accident_cdf(before, 'accidents')
        cdf.name = label + ' before'
        cdfs.append(cdf)

        print 'after'
        cdf = accident_cdf(after, 'accidents')
        cdf.name = label + ' after'
        cdfs.append(cdf)

    myplot.Cdfs(cdfs, 
                root='caws.poisson',
                transform='exponential',
                title='CCDF of Accident Counts',
                xlabel='Number of accidents',
                ylabel='Complementary CDF')

示例#6

0

显示文件

def PlotCdfs():
    """Plots distribution of ability for different number of factors.
    
    After 100000 people:
    n   max value
    50  0.333842852938
    10  0.6483317765470
    5   0.837633976492
    1   0.983619459771

    """
    cdfs = []
    for n in [50, 10, 5, 1]:
        pmf, data = WorldRecord(m=10000, n=n)
        cdf = Cdf.MakeCdfFromPmf(pmf, name='n=%d' % n)
        print n, max(cdf.Values())
        cdfs.append(cdf)

    options = dict(linewidth=2)
    plot_options = [options] * len(cdfs)

    myplot.Cdfs(cdfs,
                root='world_record_cdfs',
                plot_options=plot_options,
                title='Distribution of potential',
                xlabel='potential',
                ylabel='CDF')

示例#7

0

显示文件

def Main():
    truth = ReadTruth()
    truth_map = {}
    for pcode, label in truth:
        truth_map[pcode] = label

    labels = ReadLabels()
    photo_map, labeler_map = MakeObjects(labels)

    RunUpdates(photo_map, labeler_map, labels)

    yes = []
    no = []
    for pcode, photo in photo_map.iteritems():
        if pcode in truth_map:
            mean = photo.Mean()

            if truth_map[pcode] == '1':
                yes.append(mean)
            else:
                no.append(mean)

    myplot.Clf()
    cdf_yes = thinkbayes.MakeCdfFromList(yes, name='yes')
    cdf_no = thinkbayes.MakeCdfFromList(no, name='no')
    myplot.Cdfs([cdf_yes, cdf_no])
    myplot.Show()

    return

    myplot.Clf()
    PlotPosteriorMeans(photo_map, 'photos')
    PlotPosteriorMeans(labeler_map, 'labelers')
    myplot.Show()

示例#8

0

显示文件

def MakeFigure():
    frac1 = 0.8
    frac2 = 1 - frac1

    xs, ys = RenderPdf(1170, 179)
    pmf1 = Pmf.MakePmfFromDict(dict(zip(xs, ys)), name='blue')

    xs, ys = RenderPdf(995, 167)
    pmf2 = Pmf.MakePmfFromDict(dict(zip(xs, ys)), name='green')

    myplot.Pmfs(
        [pmf1, pmf2],
        root='normal1',
        xlabel='CLA score',
        ylabel='PDF',
    )

    pmf1.Normalize(frac1)
    pmf2.Normalize(frac2)

    ymax = max(pmf1.MaxLike(), pmf2.MaxLike())
    ymax = 0.003

    pyplot.clf()

    threshes = [1200, 1300, 1400, 1500, 1570]
    for thresh in threshes:
        myplot.Plot([thresh, thresh], [0, ymax],
                    clf=False,
                    line_options=dict(color='gray', alpha=0.5, linewidth=1))

    plot_options = [
        dict(color='blue', linewidth=2),
        dict(color='green', linewidth=2)
    ]

    myplot.Pmfs(
        [pmf1, pmf2],
        plot_options=plot_options,
        clf=False,
        root='normal2',
        xlabel='CLA score',
        ylabel='PDF',
    )

    cdf1 = Cdf.MakeCdfFromPmf(pmf1)
    cdf2 = Cdf.MakeCdfFromPmf(pmf2)

    for thresh in threshes:
        p1 = frac1 * (1 - cdf1.Prob(thresh))
        p2 = frac2 * (1 - cdf2.Prob(thresh))

        den = p1 + p2
        rep1 = p1 / den
        rep2 = p2 / den
        print thresh, den, rep1, rep2

    return

    myplot.Cdfs([cdf1, cdf2], root='normal2', xlabel='', ylabel='', title='')

示例#9

0

显示文件

文件： cumulative.py 项目： wuxiang666/thinkstats

def Resample(cdf, n=10000):
    sample = cdf.Sample(n)
    new_cdf = Cdf.MakeCdfFromList(sample, 'resampled')
    myplot.Cdfs([cdf, new_cdf],
                root='resample_cdf',
                title='CDF',
                xlabel='weight in oz',
                ylabel='CDF(x)')

示例#10

0

显示文件

def main():
	all_recs = cyb_records.Stats()
	all_recs.ReadRecords()
	print 'Number of total stats', len(all_recs.records)

	cdf = CdfPerDay(all_recs.records)
	myplot.Cdfs(cdf)
	myplot.Show(title="CDF: daily usage of machines at the YMCA", xlabel = 'Distance (in m / day)', ylabel = 'Percentile')

示例#11

0

显示文件

def MakeFigures(pool, firsts, others):
    """Creates several figures for the book."""

    # CDF of all ages
    myplot.Clf()
    myplot.Cdf(pool.age_cdf)
    myplot.Save(root='agemodel_age_cdf',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF',
                legend=False)

    # CDF of all weights
    myplot.Clf()
    myplot.Cdf(pool.weight_cdf)
    myplot.Save(root='agemodel_weight_cdf',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF',
                legend=False)

    # plot CDFs of birth ages for first babies and others
    myplot.Clf()
    myplot.Cdfs([firsts.age_cdf, others.age_cdf])
    myplot.Save(root='agemodel_age_cdfs',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF')

    myplot.Clf()
    myplot.Cdfs([firsts.weight_cdf, others.weight_cdf])
    myplot.Save(root='agemodel_weight_cdfs',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF')

    # make a scatterplot of ages and weights
    ages, weights = GetAgeWeight(pool)
    pyplot.clf()
    #pyplot.scatter(ages, weights, alpha=0.2)
    pyplot.hexbin(ages, weights, cmap=matplotlib.cm.gray_r)
    myplot.Save(root='agemodel_scatter',
                xlabel='Age (years)',
                ylabel='Birth weight (oz)',
                legend=False)

示例#12

0

显示文件

def MakeFigure():
    fp = open('babyboom.dat')
    
    # skip to the beginning of the data
    for line in fp:
        if line.find('START DATA') != -1:
            break
    
    # read a list of times
    times = []
    for line in fp:
        t = line.split()
        time = int(t[-1])
        times.append(time)
    
    # compute interarrival times
    diffs = [times[0]]
    for i in range(len(times)-1):
        diff = times[i+1] - times[i]
        diffs.append(diff)
    
    n = len(diffs)
    mu = thinkstats.Mean(diffs)
        
    print 'mean interarrival time', mu
    
    cdf = Cdf.MakeCdfFromList(diffs, 'actual')

    sample = [random.expovariate(1/mu) for i in range(n)]
    model = Cdf.MakeCdfFromList(sample, 'model')
    
    myplot.Cdf(cdf)
    myplot.Save(root='interarrivals',
              title='Time between births',
              xlabel='minutes',
              ylabel='CDF',
              legend=False,
                formats=['eps', 'png', 'pdf'])

    myplot.Cdfs([cdf, model], complement=True)
    myplot.Save(root='interarrivals_model',
                title='Time between births',
                xlabel='minutes',
                ylabel='Complementary CDF',
                yscale='log',
                formats=['eps', 'png', 'pdf'])

    pyplot.subplots_adjust(bottom=0.11)
    myplot.Cdf(cdf, complement=True)
    myplot.Save(root='interarrivals_logy',
                title='Time between births',
                xlabel='minutes',
                ylabel='Complementary CDF',
                yscale='log',
                legend=False,
                formats=['eps', 'png', 'pdf'])

示例#13

0

显示文件

def main():
    all_recs = cyb_records.Stats()
    all_recs.ReadRecords()
    print 'Number of total stats', len(all_recs.records)

    cdf = CdfPerMachine(all_recs.records)
    myplot.Cdfs(cdf)
    myplot.Show(title="CDF of cardio machine average distances",
                xlabel='Average Distances',
                ylabel='Probability')

示例#14

0

显示文件

def main():
    firsts, others, babies = Babies.PartitionBabies()
    cdf0 = Cdf.MakeCdfFromList(Babies.GetWightList(babies), name='cdf0')
    print("Sample(cdf, 10) : ", Sample(cdf0, 10))

    d1 = WeightRandomSample(cdf0, 100)
    cdf1 = Cdf.MakeCdfFromList(d1, name='cdf1')

    d2 = WeightRandomSample(cdf0, 1000)
    cdf2 = Cdf.MakeCdfFromList(d2, name='cdf2')

    myplot.Cdfs([cdf0, cdf1, cdf2], complement=False, transform=None)
    myplot.Show()

示例#15

0

显示文件

文件： pareto_world.py 项目： wuxiang666/thinkstats

def MakeFigure(xmin=100, alpha=1.7, mu=150, sigma=25):

    t1 = [xmin * random.paretovariate(alpha) for i in range(10000)]
    cdf1 = Cdf.MakeCdfFromList(t1, name='pareto')

    t2 = [random.normalvariate(mu, sigma) for i in range(10000)]
    cdf2 = Cdf.MakeCdfFromList(t2, name='normal')

    myplot.Cdfs([cdf1, cdf2],
                root='pareto_world2',
                title='Pareto World',
                xlabel='height (cm)',
                ylabel='CDF')

示例#16

0

显示文件

文件： 5-7.py 项目： qrsforever/workspace

def main():
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir='res')
    d = resp.SummarizeHeight()

    man_d = d[1]
    lady_d = d[2]

    # 男性的mu, var, sigma, 变异系数CV
    man_mu, man_var = thinkstats.TrimmedMeanVar(man_d)
    man_sigma = math.sqrt(man_var)
    man_cv = man_sigma/man_mu
    print("man: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (man_mu, man_var, man_sigma, man_cv))

    # 女性的mu, var, sigma, 变异系数CV
    lady_mu, lady_var = thinkstats.TrimmedMeanVar(lady_d)
    lady_sigma = math.sqrt(lady_var)
    lady_cv = lady_sigma/lady_mu
    print("lady: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (lady_mu, lady_var, lady_sigma, lady_cv))

    # 男性, 女性Hist分布
    man_hist = Pmf.MakeHistFromList(man_d, name='man hist')
    myplot.Hist(man_hist)
    myplot.Show()

    myplot.Clf()

    lady_hist = Pmf.MakeHistFromList(lady_d, name='lady hist')
    myplot.Hist(lady_hist)
    myplot.Show()

    myplot.Clf()

    # 男性, 女性Pmf分布
    man_pmf = Pmf.MakePmfFromHist(man_hist, name='man pmf')
    myplot.Pmf(man_pmf)
    myplot.Show()

    myplot.Clf()

    lady_pmf = Pmf.MakePmfFromHist(lady_hist, name='lady pmf')
    myplot.Pmf(lady_pmf)
    myplot.Show()

    myplot.Clf()

    # 男性/女性Cdf累积分布
    man_cdf = Cdf.MakeCdfFromPmf(man_pmf, name='man cdf')
    lady_cdf = Cdf.MakeCdfFromPmf(lady_pmf, name='lady cdf')
    myplot.Cdfs((man_cdf, lady_cdf), complement=False, transform=None)
    myplot.Show()

示例#17

0

显示文件

def main():
    firsts, others, babies = Babies.PartitionBabies()
    cdf_babies = Cdf.MakeCdfFromList(Babies.GetWightList(babies),
                                     name='babies')
    cdf_firsts = Cdf.MakeCdfFromList(Babies.GetWightList(firsts),
                                     name='firsts')
    cdf_others = Cdf.MakeCdfFromList(Babies.GetWightList(others),
                                     name='others')

    print("babies percentile rank: ", 100 * cdf_babies.Prob(mywt))
    print("firsts percentile rank: ", 100 * cdf_firsts.Prob(mywt))
    print("others percentile rank: ", 100 * cdf_others.Prob(mywt))

    myplot.Cdfs([cdf_babies, cdf_firsts, cdf_others])
    myplot.Show()

示例#18

0

显示文件

def MakeFigures(pmf, biased_pmf):
    """Makes figures showing the CDF of the biased and unbiased PMFs"""
    cdf = Cdf.MakeCdfFromPmf(pmf, 'unbiased')
    print 'unbiased median', cdf.Percentile(50)
    print 'percent < 100', cdf.Prob(100)
    print 'percent < 1000', cdf.Prob(1000)

    biased_cdf = Cdf.MakeCdfFromPmf(biased_pmf, 'biased')
    print 'biased median', biased_cdf.Percentile(50)

    myplot.Cdfs([cdf, biased_cdf],
                root='slashdot.logx',
                xlabel='Number of friends/foes',
                ylabel='CDF',
                xscale='log')

示例#19

0

显示文件

文件： bayes_height.py 项目： sjl421/Example-Codes-and-Learning-Tools

def PlotCdfs(samples):
    """Make CDFs showing the distribution of outliers."""
    cdfs = []
    for label, sample in samples.iteritems():
        outliers = [x for x in sample if x < 150]

        cdf = Cdf.MakeCdfFromList(outliers, label)
        cdfs.append(cdf)

    myplot.Clf()
    myplot.Cdfs(cdfs)
    myplot.Save(root='bayes_height_cdfs',
                title='CDF of height',
                xlabel='Reported height (cm)',
                ylabel='CDF')

示例#20

0

显示文件

def main():
    results = ReadResults()
    speeds = GetSpeeds(results)
    pmf = Pmf.MakePmfFromList(speeds, 'speeds')
    myplot.Pmf(pmf)
    myplot.Show(title='PMF of running speed',
                xlabel='speed (mph)',
                ylabel='probability')
    import Cdf

    cdf = Cdf.MakeCdfFromList(speeds, 'speeds')
    myplot.Cdf(cdf)
    myplot.Show()

    myplot.Cdfs(cdf)
    myplot.Show()

示例#21

0

显示文件

文件： birthdays.py 项目： wuxiang666/thinkstats

def Main(script):

    # read 'em and sort 'em
    birthdays = ReadBirthdays()
    birthdays.sort()

    # compute the intervals in days
    deltas = Diff(birthdays)
    days = [inter.days for inter in deltas]

    # make and plot the CCDF on a log scale.
    cdf = Cdf.MakeCdfFromList(days, name='intervals')
    myplot.Cdfs([cdf],
                'intervals', 
                xlabel='days', 
                ylabel='ccdf', 
                yscale='log',
                complement=True)

示例#22

0

显示文件

    def PlotPrevalence(self, root=None, clf=False, n=6):
        """Looks up the PMFs for a given taxon and plots them."""
        if root: clf = True

        cdfs = []
        for taxon in lowercase[:n]:
            pmf = self.GetPrevalence(taxon)
            cdf = Cdf.MakeCdfFromPmf(pmf)
            cdfs.append(cdf)

            median = cdf.Percentile(50)
            ci = cdf.Percentile(5), cdf.Percentile(95)
            print taxon, median, ci

        myplot.Cdfs(cdfs,
                    root=root,
                    clf=clf,
                    xlabel='prevalence',
                    ylabel='prob')

示例#23

0

显示文件

文件： agemodel.py 项目： wuxiang666/thinkstats

def MakeFigures(pool, firsts, others):
    """Creates several figures for the book."""

    # plot CDFs of birth ages for first babies and others
    line_options = [dict(linewidth=0.5), dict(linewidth=0.5)]

    myplot.Cdfs([firsts.age_cdf, others.age_cdf],
                root='nsfg_age_cdf',
                line_options=line_options,
                title="Mother's age CDF",
                xlabel='age (years)',
                ylabel='probability')

    # make a scatterplot of ages and weights
    ages, weights = GetAgeWeight(pool)
    pyplot.clf()
    #pyplot.scatter(ages, weights, alpha=0.2)
    pyplot.hexbin(ages, weights, cmap=matplotlib.cm.gray_r)
    myplot.Save(root='age_scatter',
                xlabel='Age (years)',
                ylabel='Birth weight (oz)',
                legend=False)

示例#24

0

显示文件

def MakeFigure(xmin=100, alpha=1.7, mu=150, sigma=25):
    """Makes a figure showing the CDF of height in ParetoWorld.

    Compared to a normal distribution.

    xmin: parameter of the Pareto distribution
    alpha: parameter of the Pareto distribution
    mu: parameter of the Normal distribution
    sigma: parameter of the Normal distribution
    """

    t1 = [xmin * random.paretovariate(alpha) for i in range(10000)]
    cdf1 = Cdf.MakeCdfFromList(t1, name='pareto')

    t2 = [random.normalvariate(mu, sigma) for i in range(10000)]
    cdf2 = Cdf.MakeCdfFromList(t2, name='normal')

    myplot.Clf()
    myplot.Cdfs([cdf1, cdf2])
    myplot.Save(root='pareto_world2',
                title='Pareto World',
                xlabel='height (cm)',
                ylabel='CDF')

示例#25

0

显示文件

def PlotDiffs(half_diffs, diffs):
    half_cdf = Cdf.MakeCdfFromList(half_diffs, 'half')
    cdf = Cdf.MakeCdfFromList(diffs, 'full')

    options = dict(linewidth=2)

    myplot.Cdfs([half_cdf, cdf],
                xlabel='time - qualifying time (min)',
                ylabel='CDF',
                plot_options=[options, options],
                root='marathon_cdf')

    diffs = [int(x) for x in diffs]
    half_diffs = [int(x) for x in half_diffs]

    pmf = Pmf.MakePmfFromList(diffs, 'full')
    half_pmf = Pmf.MakePmfFromList(half_diffs, 'half')

    myplot.Pmfs([half_pmf, pmf],
                xlabel='time - qualifying time (min)',
                ylabel='PMF',
                plot_options=[options, options],
                root='marathon_pmf')

示例#26

0

显示文件

文件： 4-8.py 项目： qrsforever/workspace

#!/usr/bin/python3
# -*- coding: utf-8 -*-

import math
import Babies
import Cdf
import myplot
import thinkstats
import erf

if __name__ == "__main__":
    firsts, others, babies = Babies.PartitionBabies()
    preglengths = Babies.GetPregnacyList(babies)
    mu = thinkstats.Mean(preglengths)
    sigma = math.sqrt(thinkstats.Var(preglengths, mu))
    print("mu = %.3f sigma = %.3f" % (mu, sigma))

    cdf0 = Cdf.MakeCdfFromList(preglengths, name='cdf0')

    ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in preglengths]
    cdf1 = Cdf.Cdf(preglengths, ys, 'cdf1')

    myplot.Cdf(cdf1, complement=False, transform=None)
    myplot.Cdfs([cdf0, cdf1], complement=False, transform=None)
    myplot.Show()
    # TODO wrong

示例#27

0

显示文件

License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""

import math
import random
import matplotlib.pyplot as pyplot
import Cdf
import myplot
import Pmf

cdfs = []
allbday = []

for i in range(10):
    n = 30
    t = [random.randrange(365) for i in range(n)]
    t.sort()

    pmf = Pmf.Pmf()
    for i in range(len(t) - 1):
        x = t[i + 1] - t[i]
        pmf.Incr(x)
        allbday.append(x)

    cdf = Cdf.MakeCdfFromPmf(pmf)
    cdfs.append(cdf)

cdf = Cdf.MakeCdfFromList(allbday, 'all')
cdfs.append(cdf)
myplot.Cdfs(cdfs, root='birthday', transform='exponential')

示例#28

0

显示文件

文件： ex3_9.py 项目： ewall/Think-Stats

# Example 3-9

import survey, Cdf, myplot

def Sample(cdf, n):
    #return [cdf.Value(random.random()) for i in range(n)]
    #return random.sample(Cdf.Values(), n)
    return [cdf.Random() for i in range(n)]

table = survey.Pregnancies()
table.ReadRecords()
births = [rec for rec in table.records if rec.outcome == 1]
weights = [x.birthwgt_lb for x in births if x.birthwgt_lb < 97]
weights_cdf = Cdf.MakeCdfFromList(weights, 'birth weights')

sample = Sample(weights_cdf, 10000)
sample_cdf = Cdf.MakeCdfFromList(sample, 'sample weights')

myplot.Clf()
myplot.Cdfs( (weights_cdf, sample_cdf) )
myplot.Show(title='CDF of all birth weights',
            xlabel='weight (lbs)',
            ylabel='cumulative probability')