Пример #1
0
def MakeGaussianModel(weights):
    """Plots a CDF with a Gaussian model.

    weights: sequence
    """
    cdf = thinkstats2.Cdf(weights, label='weights')

    mean, var = thinkstats2.TrimmedMeanVar(weights)
    std = math.sqrt(var)
    print('n, mean, std', len(weights), mean, std)

    xmin = mean - 4 * std
    xmax = mean + 4 * std

    xs, ps = thinkstats2.RenderGaussianCdf(mean, std, xmin, xmax)
    thinkplot.Plot(xs, ps, label='model', linewidth=4, color='0.8')
    thinkplot.Cdf(cdf)
Пример #2
0
def MakeFigures():
    """Plots the CDF of populations in several forms.

    On a log-log scale the tail of the CCDF looks like a straight line,
    which suggests a Pareto distribution, but that turns out to be misleading.

    On a log-x scale the distribution has the characteristic sigmoid of
    a lognormal distribution.

    The normal probability plot of log(sizes) confirms that the data fit the
    lognormal model very well.

    Many phenomena that have been described with Pareto models can be described
    as well, or better, with lognormal models.
    """
    pops = ReadData()
    print('Number of cities/towns', len(pops))

    log_pops = np.log10(pops)
    cdf = thinkstats2.Cdf(pops, label='data')
    cdf_log = thinkstats2.Cdf(log_pops, label='data')

    # pareto plot
    xs, ys = thinkstats2.RenderParetoCdf(xmin=5000, alpha=1.4, low=0, high=1e7)
    thinkplot.Plot(np.log10(xs), 1 - ys, label='model', color='0.8')

    thinkplot.Cdf(cdf_log, complement=True)
    thinkplot.Config(xlabel='log10 population', ylabel='CCDF', yscale='log')
    thinkplot.Save(root='populations_pareto')

    # lognormal plot
    thinkplot.PrePlot(cols=2)

    mu, sigma = log_pops.mean(), log_pops.std()
    xs, ps = thinkstats2.RenderGaussianCdf(mu, sigma, low=0, high=8)
    thinkplot.Plot(xs, ps, label='model', color='0.8')

    thinkplot.Cdf(cdf_log)
    thinkplot.Config(xlabel='log10 population', ylabel='CDF')

    thinkplot.SubPlot(2)
    thinkstats2.NormalProbabilityPlot(log_pops, label='data')
    thinkplot.Config(xlabel='z', ylabel='log10 population', xlim=[-5, 5])

    thinkplot.Save(root='populations_normal')
Пример #3
0
def MakeFigures(df):
    """Plots the CDF of income in several forms.
    """
    xs, ps = df.income.values, df.ps.values
    cdf = SmoothCdf(xs, ps, label='data')
    cdf_log = SmoothCdf(np.log10(xs), ps, label='data')

    # linear plot
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='hinc_linear', xlabel='household income', ylabel='CDF')

    # pareto plot
    # for the model I chose parameters by hand to fit the tail
    xs, ys = thinkstats2.RenderParetoCdf(xmin=55000,
                                         alpha=2.5,
                                         low=0,
                                         high=250000)
    thinkplot.Plot(xs, 1 - ys, label='model', color='0.8')

    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Save(root='hinc_pareto',
                   xlabel='log10 household income',
                   ylabel='CCDF',
                   xscale='log',
                   yscale='log')

    # lognormal plot
    # for the model I estimate mu and sigma using
    # percentile-based statistics
    median = cdf_log.Percentile(50)
    iqr = cdf_log.Percentile(75) - cdf_log.Percentile(25)
    std = iqr / 1.349

    # choose std to match the upper tail
    std = 0.35
    print(median, std)

    xs, ps = thinkstats2.RenderGaussianCdf(median, std, low=3.5, high=5.5)
    thinkplot.Plot(xs, ps, label='model', color='0.8')

    thinkplot.Cdf(cdf_log)
    thinkplot.Save(root='hinc_normal',
                   xlabel='log10 household income',
                   ylabel='CDF')
Пример #4
0
def MakeGaussianCdf():
    """Generates a plot of the gaussian CDF."""

    thinkplot.PrePlot(3)

    mus = [1.0, 2.0, 3.0]
    sigmas = [0.5, 0.4, 0.3]
    for mu, sigma in zip(mus, sigmas):
        xs, ps = thinkstats2.RenderGaussianCdf(mu=mu,
                                               sigma=sigma,
                                               low=-1.0,
                                               high=4.0)
        label = 'mu=%g, sigma=%g' % (mu, sigma)
        thinkplot.Plot(xs, ps, label=label)

    thinkplot.Save(root='analytic_gaussian_cdf',
                   title='Gaussian CDF',
                   xlabel='x',
                   ylabel='CDF',
                   loc=2)
Пример #5
0
def MakeGaussianModel(weights):
    """Plot the CDF of birthweights with a gaussian model."""

    # estimate parameters: trimming outliers yields a better fit
    mu, var = thinkstats2.TrimmedMeanVar(weights, p=0.01)
    print('Mean, Var', mu, var)

    # plot the model
    sigma = math.sqrt(var)
    print('Sigma', sigma)
    xs, ps = thinkstats2.RenderGaussianCdf(mu, sigma, low=0, high=12.5)

    thinkplot.Plot(xs, ps, label='model', color='0.8')

    # plot the data
    cdf = thinkstats2.Cdf(weights, label='data')

    thinkplot.PrePlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='analytic_birthwgt_model',
                   title='Birth weights',
                   xlabel='birth weight (lbs)',
                   ylabel='CDF')