示例#1
0
def MakeFigures(firsts, others):
    """Plot Hists and Pmfs for the pregnancy length."""

    # bar options is a list of option dictionaries to be passed to myplot.bar
    bar_options = [dict(color='0.9'), dict(color='blue')]

    # make the histogram
    axis = [23, 46, 0, 2700]
    Hists([firsts.hist, others.hist])
    myplot.Save(root='nsfg_hist',
                title='Histogram',
                xlabel='weeks',
                ylabel='frequency',
                axis=axis)

    # make the PMF
    axis = [23, 46, 0, 0.6]
    Hists([firsts.pmf, others.pmf])
    myplot.Save(root='nsfg_pmf',
                title='PMF',
                xlabel='weeks',
                ylabel='probability',
                axis=axis)
def NormalProbPlot(samples):
    """Makes a normal probability plot for each sample in samples."""
    pyplot.clf()

    markers = dict(male='b', female='g')

    for label, sample in samples.iteritems():
        NormalPlot(sample, label, markers[label], jitter=0.0)

    myplot.Save(
        show=True,
        #root='bayes_height_normal',
        title='Normal probability plot',
        xlabel='Standard normal',
        ylabel='Reported height (cm)')
def PlotCdfs(samples):
    """Make CDFs showing the distribution of outliers."""
    cdfs = []
    for label, sample in samples.iteritems():
        outliers = [x for x in sample if x < 150]

        cdf = Cdf.MakeCdfFromList(outliers, label)
        cdfs.append(cdf)

    myplot.Clf()
    myplot.Cdfs(cdfs)
    myplot.Save(root='bayes_height_cdfs',
                title='CDF of height',
                xlabel='Reported height (cm)',
                ylabel='CDF')
示例#4
0
def PlotDiffs(filename='heri.0', root='heri1', flag=False):
    pyplot.clf()

    data = ReadData(filename)
    xs, ys = zip(*data)

    if flag:
        RunFit(xs, ys)

    pyplot.plot(xs, ys, 'b.:', markersize=15)
    myplot.Save(root=root,
                title='Yearly changes',
                xlabel='',
                ylabel='percentage points',
                axis=[1972, 2013, -1.2, 2.1])
def MakeFigures():
    pops = populations.ReadData()
    print len(pops)

    cdf = Cdf.MakeCdfFromList(pops, 'populations')

    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Save(root='populations',
                title='City/Town Populations',
                xlabel='population',
                ylabel='CDF',
                legend=False)

    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Save(root='populations_logx',
                title='City/Town Populations',
                xlabel='population',
                ylabel='CDF',
                xscale='log',
                legend=False)

    myplot.Clf()
    myplot.Cdf(cdf, complement=True)
    myplot.Save(root='populations_loglog',
                title='City/Town Populations',
                xlabel='population',
                ylabel='Complementary CDF',
                yscale='log',
                xscale='log',
                legend=False)

    t = [math.log(x) for x in pops]
    t.sort()
    rankit.MakeNormalPlot(t, 'populations_rankit')
示例#6
0
def PlotProbs(filename='p.heri.31'):
    pyplot.clf()
    for x in [1975.5, 1984.5, 1998.5, 2006.5]:
        xs = [x, x]
        ys = [0, 1]
        pyplot.plot(xs, ys, color='0.8', linewidth=10)

    data = ReadData(filename)
    xs, ys = zip(*data)
    pyplot.plot(xs, ys, 'bo-', color='blue', linewidth=2, markersize=6)
    myplot.Save(root='heri2',
                title='Location of changepoints',
                xlabel='',
                ylabel='cumulative probability',
                axis=[1972, 2010, 0, 1])
示例#7
0
def PlotOneSimulation(xscale='linear'):
    pyplot.clf()
    PlotSimulation(100000)

    if xscale == 'linear':
        pyplot.axis([-0.1, 1.1, 0.0, 0.7])
        pyplot.xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
    else:
        pyplot.subplots_adjust(bottom=0.15)
        pyplot.xscale(xscale)

    myplot.Save(root='world_record_sim_%s' % xscale,
                title='Simulated world record progression',
                xlabel='Fraction of population tested',
                ylabel='Max potential seen')
示例#8
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)
    pmf = Pmf.MakePmfFromList(speeds, 'speeds')
    pmf = BiasPmf(7,pmf)
    myplot.Hist(pmf)
    #myplot.Show(title='PMF of observed speed',
    #           xlabel='speed (mph)',
    #           ylabel='probability')
    myplot.Save(
                formats=['png'],
                root='runner',
                title='PMF of observed speed',
               xlabel='speed (mph)',
               ylabel='probability')
def CheckCdf():
    """Compare chi2 values from simulation with chi2 distributions.
    """
    for df in [1, 2, 3]:
        xs, ys = Chi2Cdf(df=df, high=15)
        pyplot.plot(xs, ys, label=df)

    t = [SimulateChi2() for i in range(1000)]
    cdf = Cdf.MakeCdfFromList(t)

    myplot.Cdf(cdf)
    myplot.Save(root='khan3',
                xlabel='chi2 value',
                ylabel="CDF",
                formats=['png'])
示例#10
0
def MakeNormalPlot(ys, root=None, line_options={}, **options):
    """Makes a normal probability plot.
    
    Args:
        ys: sequence of values
        line_options: dictionary of options for pyplot.plot        
        options: dictionary of options for myplot.Save
    """
    # TODO: when n is small, generate a larger sample and desample
    n = len(ys)
    xs = [random.normalvariate(0.0, 1.0) for i in range(n)]

    pyplot.clf()
    pyplot.plot(sorted(xs), sorted(ys), 'b.', markersize=3, **line_options)

    myplot.Save(root, xlabel='Standard normal values', legend=False, **options)
示例#11
0
def MakeFigures(pmf, biased_pmf):
    """Makes figures showing the CDF of the biased and unbiased PMFs"""
    cdf = Cdf.MakeCdfFromPmf(pmf, 'unbiased')
    print('unbiased median', cdf.Percentile(50))
    print('percent < 100', cdf.Prob(100))
    print('percent < 1000', cdf.Prob(1000))

    biased_cdf = Cdf.MakeCdfFromPmf(biased_pmf, 'biased')
    print('biased median', biased_cdf.Percentile(50))

    myplot.Clf()
    myplot.Cdfs([cdf, biased_cdf])
    myplot.Save(root='slashdot.logx',
                xlabel='Number of friends/foes',
                ylabel='CDF',
                xscale='log')
示例#12
0
def main():
    weeks = range(35, 46)
    pyplot.clf()
    p = {'first': [], 'others': []}
    for week in weeks:
        firstBabies, otherBabies = BornAtButNotBefore(week)
        p['first'].append(firstBabies.Prob(week))
        p['others'].append(otherBabies.Prob(week))

    pyplot.plot(weeks, p['first'], label="First babies")
    pyplot.plot(weeks, p['others'], label="Others babies")
    
    myplot.Save(root='first_conditional_pmf',
                title='My conditional',
                xlabel='weeks',
                ylabel='probability')
示例#13
0
def Main(script):

    # read 'em and sort 'em
    birthdays = ReadBirthdays()
    birthdays.sort()

    # compute the intervals in days
    deltas = Diff(birthdays)
    days = [inter.days for inter in deltas]

    # make and plot the CCDF on a log scale.
    cdf = Cdf.MakeCdfFromList(days, name='intervals')
    scale = myplot.Cdf(cdf, transform='exponential')
    myplot.Save(root='intervals', 
                xlabel='days', 
                ylabel='ccdf', 
                **scale)
示例#14
0
def MakePercentiles(shelf, n=50):
    pairs = ReadShelf(shelf)
    pairs.sort()
    for x, y in pairs:
        print x, y
    return

    xs = []
    plists = []

    for i in range(0, len(pairs), n):
        subset = pairs[i:i + n]
        print i, len(subset)
        halfs, fulls = zip(*subset)
        cdf = Cdf.MakeCdfFromList(fulls)
        ys = [cdf.Percentile(x) for x in [5, 25, 50, 75, 95]]
        x = thinkstats.Mean(halfs)

        print x, ys

        xs.append(x)
        plists.append(ys)

    # drop the last point
    xs.pop()
    plists.pop()

    ylists = zip(*plists)

    plot_options = [
        dict(color='red', label='5%ile', linestyle='dotted'),
        dict(color='orange', label='25%ile', linestyle='dashed'),
        dict(color='yellow', label='50%ile', linestyle='solid'),
        dict(color='green', label='75%ile', linestyle='dashed'),
        dict(color='cyan', label='95%ile', linestyle='dotted'),
    ]

    pyplot.plot([94, 94], [100, 350])

    for ys, d in zip(ylists, plot_options):
        pyplot.plot(xs, ys, linewidth=3, **d)

    myplot.Save(root='race_predictor4',
                xlabel='Half marathon (min)',
                ylabel='Marathon (min)',
                show=True)
示例#15
0
def MakeDiffFigure(firsts, others):
    """Plot the difference between the PMFs."""

    weeks = range(35, 46)
    diffs = []
    for week in weeks:
        p1 = firsts.pmf.Prob(week)
        p2 = others.pmf.Prob(week)
        diff = 100 * (p1 - p2)
        diffs.append(diff)

    pyplot.clf()
    pyplot.bar(weeks, diffs, align='center')
    myplot.Save(root='nsfg_diffs',
                title='Difference in PMFs',
                xlabel='weeks',
                ylabel='100 (PMF$_{first}$ - PMF$_{other}$)',
                legend=False)
示例#16
0
    def plot_data(self, root='caws.accident'):
        """Plots a time series of monthly accidents.

        root: string prefix of the output files.
        """
        pyplot.clf()
        for name, av_dict in self.subsets.iteritems():
            hist = self.count_accidents(av_dict)
            
            print name, 'Total accidents', hist.Total()
            years, counts = zip(*sorted(hist.Items()))
            pyplot.plot(years, counts, label=name)

        myplot.Save(root=root,
                    title='Monthly Accident Counts',
                    xlabel='Year',
                    ylabel='Number of accidents',
                    axis=[1991.5, 2002.5, 0, 40])
示例#17
0
def MakeParetoCdf():
    """Generates a plot of the CDF of height in Pareto World."""
    n = 50
    max = 1000.0
    xs = [max * i / n for i in range(n)]

    xmin = 100
    alpha = 1.7
    ps = [ParetoCdf(x, alpha, xmin) for x in xs]
    print 'Median', ParetoMedian(xmin, alpha)

    pyplot.clf()
    pyplot.plot(xs, ps, linewidth=2)
    myplot.Save('pareto_world1',
                title='Pareto CDF',
                xlabel='height (cm)',
                ylabel='CDF',
                legend=False)
示例#18
0
def MakeParetoCdf():
    """Generates a plot of the Pareto CDF."""
    n = 50
    max = 10.0
    xs = [max * i / n for i in range(n)]

    xmin = 0.5
    alpha = 1.0
    ps = [ParetoCdf(x, alpha, xmin) for x in xs]
    print('Fraction <= 10', ParetoCdf(xmin, alpha, 10))

    pyplot.clf()
    pyplot.plot(xs, ps, linewidth=2)
    myplot.Save('pareto_cdf',
                title='Pareto CDF',
                xlabel='x',
                ylabel='CDF',
                legend=False)
示例#19
0
def PlotReligiousSubset(years, cols, labels, i, j):
    """Helper function that factors out common plotting code.

    years: sequence of years
    cols: list of columns to plot
    labels: list of labels (corresponding to cols)
    i,j: slice indices of the columns to plot
    """
    pyplot.clf()
    options = dict(linewidth=3, markersize=0, alpha=0.7)
    for col, label in zip(cols[i:j], labels[i:j]):
        pyplot.plot(years, col, label=label, **options)

    root = 'heri.religious.%d.%d' % (i, j)
    myplot.Save(root=root,
                formats=FORMATS,
                xlabel='Year',
                ylabel='% None',
                title='Religious preference')
示例#20
0
def MakeExpoCdf():
    """Generates a plot of the exponential CDF."""
    n = 40
    max = 2.5
    xs = [max * i / n for i in range(n)]

    lam = 2.0
    ps = [ExpoCdf(x, lam) for x in xs]

    percentile = -math.log(0.05) / lam
    print('Fraction <= ', percentile, ExpoCdf(lam, percentile))

    pyplot.clf()
    pyplot.plot(xs, ps, linewidth=2)
    myplot.Save('expo_cdf',
                title='Exponential CDF',
                xlabel='x',
                ylabel='CDF',
                legend=False)
示例#21
0
def PlotCurves(curves, root=None, clf=False):
    """Plots a set of curves.

    curves is a list of curves; each curve is a list of (x, y) pairs.
    """
    if root: 
        pyplot.clf()

    n = len(curves)
    for i, curve in enumerate(curves):
        curve = OffsetCurve(curve, i, n)
        xs, ys = zip(*curve)
        pyplot.plot(xs, ys, color='blue', alpha=0.2)

    myplot.Save(root=root,
                clf=clf,
                xlabel='# samples',
                ylabel='# taxa',
                legend=False)
示例#22
0
    def plot_data(self, root='caws.traffic'):
        """Makes a plot of AADT for each location."""
        pyplot.clf()
        series = {}

        for loc, name in self.locs.iteritems():
            for year in self.years:
                adt = self.lookup(year, loc) / 1000
                series.setdefault(name, []).append(adt)

        # TODO: fix the year labels
        for name, adts in series.iteritems():
            pyplot.plot(self.years, adts, label=name)

        myplot.Save(root=root,
                    title='Traffic volume',
                    xlabel='Year',
                    ylabel='AADT',
                    axis=[1991.5, 2002.5, 0, 160])
示例#23
0
def main():
    print 'pae', 0.3 / (0.3 + 3.0 / 13)

    doorA = MakeUniformSuite(0.0, 1.0, 101, name='Door A')
    evidence = 3, 2
    Update(doorA, evidence)

    doorC = MakeUniformSuite(0.0, 1.0, 101, name='Door C')
    evidence = 3, 10
    Update(doorC, evidence)

    print TotalProbability(doorA, doorC, ProbWinning)

    # plot the posterior distributions
    myplot.Pmfs([doorA, doorC])
    myplot.Save(root='blinky',
                formats=['pdf', 'png'],
                title='Probability of blinking',
                xlabel='P(blink)',
                ylabel='Posterior probability')
def PlotMarginals(suite):
    """Plot the marginal distributions for a 2-D joint distribution."""
    pmf_m, pmf_s = ComputeMarginals(suite)

    pyplot.clf()
    pyplot.figure(1, figsize=(7, 4))

    pyplot.subplot(1, 2, 1)
    cdf_m = Cdf.MakeCdfFromPmf(pmf_m, 'mu')
    myplot.Cdf(cdf_m)
    pyplot.xlabel('Mean height (cm)')
    pyplot.ylabel('CDF')

    pyplot.subplot(1, 2, 2)
    cdf_s = Cdf.MakeCdfFromPmf(pmf_s, 'sigma')
    myplot.Cdf(cdf_s)
    pyplot.xlabel('Std Dev height (cm)')
    pyplot.ylabel('CDF')

    myplot.Save(root='bayes_height_marginals_%s' % suite.name)
def MakeDiffFigure(firsts, others):
    '''
    绘制两个 PMF 的不同之处
    '''
    weeks = range(35, 46)  # 只绘制这个范围的,我们关注这个范围
    diffs = []
    for week in weeks:
        p1 = firsts.pmf.Prob(week)
        p2 = others.pmf.Prob(week)
        diff = 100 * (p1 - p2)  #计算两种概率的差异
        diffs.append(diff)

    # 要开始绘图了
    pyplot.clf()
    # 数值序列,概率差异序列,对其方式
    pyplot.bar(weeks, diffs, align='center')
    myplot.Save(root='nsfg_diffs',
                title='Difference in PMFs',
                xlabel='weeks',
                ylabel='100 (PMF$_{first}$ - PMF$_{other}$)',
                legend=False)
示例#26
0
def main():
    upper_bound = 200
    prior = MakeUniformSuite(1, upper_bound, upper_bound)
    prior.name = 'prior'

    evidence = 60
    posterior = prior.Copy()
    Update(posterior, evidence)
    posterior.name = 'posterior'

    print CredibleInterval(posterior, 90)

    # plot the posterior distribution
    pyplot.subplots_adjust(wspace=0.4, left=0.15)
    plot_options = dict(linewidth=2)

    myplot.Pmf(posterior, **plot_options)
    myplot.Save(root='locomotive',
                title='Locomotive problem',
                xlabel='Number of trains',
                ylabel='Posterior probability')
示例#27
0
def PlotSimulations():
    pyplot.rc('figure', figsize=(4, 4.5))
    pyplot.rc('font', size=9.0)
    pyplot.rc('xtick.major', size=0)
    pyplot.rc('ytick.major', size=0)

    pyplot.subplots_adjust(wspace=0.4,
                           hspace=0.4,
                           right=0.95,
                           left=0.1,
                           top=0.95,
                           bottom=0.05)

    pyplot.title('Simulated world records')

    for i in range(1, 5):
        pyplot.subplot(2, 2, i)
        pyplot.xscale('log')
        PlotSimulation(100000)

    myplot.Save(root='world_record_sim2')
    pyplot.rcdefaults()
def PlotPosterior(xs, ys, suite, pcolor=False, contour=True):
    """Makes a contour plot.
    
    xs: sequence of values
    ys: sequence of values
    suite: Pmf that maps (x, y) to z
    """
    X, Y = numpy.meshgrid(xs, ys)
    func = lambda x, y: suite.Prob((x, y))
    prob = numpy.vectorize(func)
    Z = prob(X, Y)

    pyplot.clf()
    if pcolor:
        pyplot.pcolor(X, Y, Z)
    if contour:
        pyplot.contour(X, Y, Z)

    myplot.Save(root='bayes_height_posterior_%s' % suite.name,
                title='Posterior joint distribution',
                xlabel='Mean height (cm)',
                ylabel='Stddev (cm)')
def PlotCoefVariation(suites):
    """Plot the posterior distributions for CV.

    suites: map from label to Pmf of CVs.
    """
    pyplot.clf()

    pmfs = {}
    for label, suite in suites.iteritems():
        pmf = ComputeCoefVariation(suite)
        cdf = Cdf.MakeCdfFromPmf(pmf, label)
        myplot.Cdf(cdf)

        pmfs[label] = pmf

    myplot.Save(root='bayes_height_cv',
                title='Coefficient of variation',
                xlabel='cv',
                ylabel='CDF')

    print 'female bigger', ProbBigger(pmfs['female'], pmfs['male'])
    print 'male bigger', ProbBigger(pmfs['male'], pmfs['female'])
示例#30
0
def MakeNormalModel(values):
    """Plot the CDF of birthweights with a normal model."""
    
    # estimate parameters: trimming outliers yields a better fit
    mu, var = thinkstats.TrimmedMeanVar(values, p=0.01)
    print 'Mean, Var', mu, var
    
    # plot the model
    sigma = math.sqrt(var)
    print 'Sigma', sigma
    xs, ps = RenderNormalCdf(mu, sigma, 200)

    pyplot.clf()
    pyplot.plot(xs, ps, label='model', linewidth=4, color='0.8')

    # plot the data
    cdf = Cdf.MakeCdfFromList(values)
    xs, ps = cdf.Render()
    pyplot.plot(xs, ps, label='data', linewidth=2, color='red')
 
    myplot.Save(show=True,
                ylabel = 'CDF')