Пример #1
0
def main():
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir='res')
    d = resp.SummarizeHeight()

    man_d = d[1]
    lady_d = d[2]

    # 男性的mu, var, sigma, 变异系数CV
    man_mu, man_var = thinkstats.TrimmedMeanVar(man_d)
    man_sigma = math.sqrt(man_var)
    man_cv = man_sigma/man_mu
    print("man: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (man_mu, man_var, man_sigma, man_cv))

    # 女性的mu, var, sigma, 变异系数CV
    lady_mu, lady_var = thinkstats.TrimmedMeanVar(lady_d)
    lady_sigma = math.sqrt(lady_var)
    lady_cv = lady_sigma/lady_mu
    print("lady: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (lady_mu, lady_var, lady_sigma, lady_cv))

    # 男性, 女性Hist分布
    man_hist = Pmf.MakeHistFromList(man_d, name='man hist')
    myplot.Hist(man_hist)
    myplot.Show()

    myplot.Clf()

    lady_hist = Pmf.MakeHistFromList(lady_d, name='lady hist')
    myplot.Hist(lady_hist)
    myplot.Show()

    myplot.Clf()

    # 男性, 女性Pmf分布
    man_pmf = Pmf.MakePmfFromHist(man_hist, name='man pmf')
    myplot.Pmf(man_pmf)
    myplot.Show()

    myplot.Clf()

    lady_pmf = Pmf.MakePmfFromHist(lady_hist, name='lady pmf')
    myplot.Pmf(lady_pmf)
    myplot.Show()

    myplot.Clf()

    # 男性/女性Cdf累积分布
    man_cdf = Cdf.MakeCdfFromPmf(man_pmf, name='man cdf')
    lady_cdf = Cdf.MakeCdfFromPmf(lady_pmf, name='lady cdf')
    myplot.Cdfs((man_cdf, lady_cdf), complement=False, transform=None)
    myplot.Show()
Пример #2
0
def MakeNormalPlot(values):
    """Makes a normal probability plot.
    
    Args:
        values: sequence of values
        lineoptions: dictionary of options for pyplot.plot        
        options: dictionary of options for myplot.Save
    """
    # TODO: when n is small, generate a larger sample and desample
    pyplot.clf()

    # compute parameters
    mu, var = thinkstats.TrimmedMeanVar(values, p=0.01)
    sigma = math.sqrt(var)
    n = len(values)

    # plot resampled data
    PlotSimulatedData(mu, sigma, n)

    # plot real data
    xs = GenerateNormalVariates(0, 1, n)
    pyplot.plot(sorted(xs), sorted(values), 'r.', markersize=3)
 
    myplot.Save(show=True,
                xlabel='Standard normal values',
                legend=False)
Пример #3
0
    def MakeNormalModel(self, weights, root,
                   xmax=175, 
                   xlabel='adult weight (kg)',
                   axis=None):
        cdf = Cdf.MakeCdfFromList(weights)
                
        pyplot.clf()
        
        t = weights[:]
        t.sort()
        mu, var = thinkstats.TrimmedMeanVar(t)
        print('n, Mean, Var', len(weights), mu, var)
        
        sigma = math.sqrt(var)
        print('Sigma', sigma)

        xs, ps = continuous.RenderNormalCdf(mu, sigma, xmax)
        pyplot.plot(xs, ps, label='model', linewidth=4, color='0.7')
    
        xs, ps = cdf.Render()
        pyplot.plot(xs, ps, label='data', linewidth=2, color='blue')
     
        myplot.Save(root,
                    title = 'Adult weight',
                    xlabel = xlabel,
                    ylabel = 'CDF',
                    axis=axis or [0, xmax, 0, 1])
Пример #4
0
    def SummarizeHeight(self):
        """Print summary statistics for male and female height."""

        # make a dictionary that maps from gender code to list of heights
        d = {1: [], 2: [], 'all': []}
        [d[r.sex].append(r.htm3) for r in self.records if r.htm3 != 'NA']
        [d['all'].append(r.htm3) for r in self.records if r.htm3 != 'NA']

        for key, t in d.iteritems():
            mu, var = thinkstats.TrimmedMeanVar(t)
            sigma = math.sqrt(var)
            cv = sigma / mu
            print key, len(t), mu, var, sigma, cv
Пример #5
0
    def SummarizeWeight(self):
        """Print summary statistics for male and female weight."""

        # make a dictionary that maps from gender code to list of weights
        d = {1: [], 2: [], 'all': []}
        [d[r.sex].append(r.weight2) for r in self.records if r.weight2 != 'NA']
        [d['all'].append(r.weight2) for r in self.records if r.weight2 != 'NA']

        print('Weight (kg):')
        print('key n     mean     var    sigma     cv')
        for key, t in d.items():
            mu, var = thinkstats.TrimmedMeanVar(t)
            sigma = math.sqrt(var)
            cv = sigma / mu
            print(key, len(t), mu, var, sigma, cv)
Пример #6
0
def MakeNormalModel(values):
    """Plot the CDF of birthweights with a normal model."""
    
    # estimate parameters: trimming outliers yields a better fit
    mu, var = thinkstats.TrimmedMeanVar(values, p=0.01)
    print 'Mean, Var', mu, var
    
    # plot the model
    sigma = math.sqrt(var)
    print 'Sigma', sigma
    xs, ps = RenderNormalCdf(mu, sigma, 200)

    pyplot.clf()
    pyplot.plot(xs, ps, label='model', linewidth=4, color='0.8')

    # plot the data
    cdf = Cdf.MakeCdfFromList(values)
    xs, ps = cdf.Render()
    pyplot.plot(xs, ps, label='data', linewidth=2, color='red')
 
    myplot.Save(show=True,
                ylabel = 'CDF')
Пример #7
0
def MakeNormalModel(weights):
    """Plot the CDF of birthweights with a normal model."""

    # estimate parameters: trimming outliers yields a better fit
    mu, var = thinkstats.TrimmedMeanVar(weights, p=0.01)
    print('Mean, Var', mu, var)

    # plot the model
    sigma = math.sqrt(var)
    print('Sigma', sigma)
    xs, ps = RenderNormalCdf(mu, sigma, 200)

    pyplot.clf()
    pyplot.plot(xs, ps, label='model', linewidth=4, color='0.8')

    # plot the data
    cdf = Cdf.MakeCdfFromList(weights)
    xs, ps = cdf.Render()
    pyplot.plot(xs, ps, label='data', linewidth=2, color='blue')

    myplot.Save('nsfg_birthwgt_model',
                title='Birth weights',
                xlabel='birth weight (oz)',
                ylabel='CDF')