def MakeFigures(df): """Generates CDFs and normal prob plots for weights and log weights.""" weights = df.wtkg2.dropna() log_weights = np.log10(weights) # plot weights on linear and log scales thinkplot.PrePlot(cols=2) MakeNormalModel(weights) thinkplot.Config(xlabel='adult weight (kg)', ylabel='CDF') thinkplot.SubPlot(2) MakeNormalModel(log_weights) thinkplot.Config(xlabel='adult weight (log10 kg)') thinkplot.Save(root='brfss_weight') # make normal probability plots on linear and log scales thinkplot.PrePlot(cols=2) MakeNormalPlot(weights) thinkplot.Config(xlabel='z', ylabel='weights (kg)') thinkplot.SubPlot(2) MakeNormalPlot(log_weights) thinkplot.Config(xlabel='z', ylabel='weights (log10 kg)') thinkplot.Save(root='brfss_weight_normal')
def main(filename='mystery0.dat'): data = ReadFile(filename) cdf = thinkstats2.Cdf(data) thinkplot.PrePlot(rows=2, cols=3) thinkplot.SubPlot(1) thinkplot.Cdf(cdf) thinkplot.Config(title='linear') thinkplot.SubPlot(2) scale = thinkplot.Cdf(cdf, xscale='log') thinkplot.Config(title='logx', **scale) thinkplot.SubPlot(3) scale = thinkplot.Cdf(cdf, transform='exponential') thinkplot.Config(title='expo', **scale) thinkplot.SubPlot(4) xs, ys = thinkstats2.NormalProbability(data) thinkplot.Plot(xs, ys) thinkplot.Config(title='normal') thinkplot.SubPlot(5) scale = thinkplot.Cdf(cdf, transform='pareto') thinkplot.Config(title='pareto', **scale) thinkplot.SubPlot(6) scale = thinkplot.Cdf(cdf, transform='weibull') thinkplot.Config(title='weibull', **scale) thinkplot.Show(legend=False)
def EstimateMarriageSurvivalByDecade(groups, **options): """Groups respondents by decade and plots survival curves. groups: GroupBy object """ thinkplot.PrePlot(len(groups)) for _, group in groups: _, sf = EstimateMarriageSurvival(group) thinkplot.Plot(sf, **options)
def AddLabelsByDecade(groups, **options): """Draws fake points in order to add labels to the legend. groups: GroupBy object """ thinkplot.PrePlot(len(groups)) for name, _ in groups: label = '%d0s' % name thinkplot.Plot([15], [1], label=label, **options)
def MakeParetoCdf(): """Generates a plot of the Pareto CDF.""" xmin = 0.5 thinkplot.PrePlot(3) for alpha in [2.0, 1.0, 0.5]: xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 10.0, n=100) thinkplot.Plot(xs, ps, label=r'$\alpha=%g$' % alpha) thinkplot.Save(root='analytic_pareto_cdf', title='Pareto CDF', xlabel='x', ylabel='CDF')
def MakeExpoCdf(): """Generates a plot of the exponential CDF.""" thinkplot.PrePlot(3) for lam in [2.0, 1, 0.5]: xs, ps = thinkstats2.RenderExpoCdf(lam, 0, 3.0, 50) label = r'$\lambda=%g$' % lam thinkplot.Plot(xs, ps, label=label) thinkplot.Save(root='analytic_expo_cdf', title='Exponential CDF', xlabel='x', ylabel='CDF')
def PlotConditionalSurvival(durations): """Plots conditional survival curves for a range of t0. durations: list of durations """ pmf = thinkstats2.Pmf(durations) times = [8, 16, 24, 32] thinkplot.PrePlot(len(times)) for t0 in times: sf = ConditionalSurvival(pmf, t0) label = 't0=%d' % t0 thinkplot.Plot(sf, label=label) thinkplot.Show()
def PlotPredictionsByDecade(groups, **options): """Groups respondents by decade and plots survival curves. groups: GroupBy object """ hfs = [] for _, group in groups: hf, sf = EstimateMarriageSurvival(group) hfs.append(hf) thinkplot.PrePlot(len(hfs)) for i, hf in enumerate(hfs): if i > 0: hf.Extend(hfs[i - 1]) sf = hf.MakeSurvival() thinkplot.Plot(sf, **options)
def MakeExampleNormalPlot(): """Generates a sample normal probability plot. """ n = 1000 thinkplot.PrePlot(3) mus = [0, 1, 5] sigmas = [1, 1, 2] for mu, sigma in zip(mus, sigmas): sample = np.random.normal(mu, sigma, n) xs, ys = thinkstats2.NormalProbability(sample) label = '$\mu=%d$, $\sigma=%d$' % (mu, sigma) thinkplot.Plot(xs, ys, label=label) thinkplot.Save(root='analytic_normal_prob_example', title='Normal probability plot', xlabel='standard normal sample', ylabel='sample values')
def MakeComparison(firsts, others): """Plots histograms of pregnancy length for first babies and others. firsts: DataFrame others: DataFrame """ first_hist = thinkstats2.Hist(firsts.prglngth, label='first') other_hist = thinkstats2.Hist(others.prglngth, label='other') width = 0.45 thinkplot.PrePlot(2) thinkplot.Hist(first_hist, align='right', width=width) thinkplot.Hist(other_hist, align='left', width=width) thinkplot.Save(root='first_nsfg_hist', title='Histogram', xlabel='weeks', ylabel='frequency', axis=[27, 46, 0, 2700])
def MakeBabyBoom(): """Plot CDF of interarrival time on log and linear scales. """ # compute the interarrival times df = ReadBabyBoom() diffs = df.minutes.diff() cdf = thinkstats2.Cdf(diffs, label='actual') thinkplot.PrePlot(cols=2) thinkplot.Cdf(cdf) thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False) thinkplot.SubPlot(2) thinkplot.Cdf(cdf, complement=True) thinkplot.Config(xlabel='minutes', ylabel='CCDF', yscale='log', legend=False) thinkplot.Save(root='analytic_interarrivals', legend=False)
def MakeNormalPlot(weights, term_weights): """Generates a normal probability plot of birth weights.""" mean, var = thinkstats2.TrimmedMeanVar(weights, p=0.01) std = math.sqrt(var) xs = [-4, 4] fxs, fys = thinkstats2.FitLine(xs, mean, std) thinkplot.Plot(fxs, fys, linewidth=4, color='0.8') thinkplot.PrePlot(2) xs, ys = thinkstats2.NormalProbability(weights) thinkplot.Plot(xs, ys, label='all live') xs, ys = thinkstats2.NormalProbability(term_weights) thinkplot.Plot(xs, ys, label='full term') thinkplot.Save(root='analytic_birthwgt_normal', title='Normal probability plot', xlabel='Standard deviations from mean', ylabel='Birth weight (lbs)')
def MakeNormalCdf(): """Generates a plot of the normal CDF.""" thinkplot.PrePlot(3) mus = [1.0, 2.0, 3.0] sigmas = [0.5, 0.4, 0.3] for mu, sigma in zip(mus, sigmas): xs, ps = thinkstats2.RenderNormalCdf(mu=mu, sigma=sigma, low=-1.0, high=4.0) label = r'$\mu=%g$, $\sigma=%g$' % (mu, sigma) thinkplot.Plot(xs, ps, label=label) thinkplot.Save(root='analytic_normal_cdf', title='Normal CDF', xlabel='x', ylabel='CDF', loc=2)
def PlotMarriageData(resp): """Plots hazard and survival functions. resp: DataFrame of respondents """ hf, sf = EstimateMarriageSurvival(resp) thinkplot.PrePlot(rows=2) thinkplot.Plot(hf) thinkplot.Config(ylabel='hazard', legend=False) thinkplot.SubPlot(2) thinkplot.Plot(sf) thinkplot.Save(root='survival2', xlabel='age (years)', ylabel='prob unmarried', ylim=[0, 1], legend=False, formats=FORMATS) return sf
def PlotSurvival(complete): """Plots survival and hazard curves. complete: list of complete lifetimes """ thinkplot.PrePlot(3, rows=2) cdf = thinkstats2.Cdf(complete, label='cdf') sf = MakeSurvivalFromCdf(cdf, label='survival') print(cdf[13]) print(sf[13]) thinkplot.Plot(sf) thinkplot.Cdf(cdf, alpha=0.2) thinkplot.Config() thinkplot.SubPlot(2) hf = sf.MakeHazardFunction(label='hazard') print(hf[39]) thinkplot.Plot(hf) thinkplot.Config(ylim=[0, 0.75])
def PlotHazard(complete, ongoing): """Plots the hazard function and survival function. complete: list of complete lifetimes ongoing: list of ongoing lifetimes """ # plot S(t) based on only complete pregnancies sf = MakeSurvivalFromSeq(complete) thinkplot.Plot(sf, label='old S(t)', alpha=0.1) thinkplot.PrePlot(2) # plot the hazard function hf = EstimateHazardFunction(complete, ongoing) thinkplot.Plot(hf, label='lams(t)', alpha=0.5) # plot the survival function sf = hf.MakeSurvival() thinkplot.Plot(sf, label='S(t)') thinkplot.Show(xlabel='t (weeks)')
def MakeNormalModel(weights): """Plot the CDF of birthweights with a normal model.""" # estimate parameters: trimming outliers yields a better fit mu, var = thinkstats2.TrimmedMeanVar(weights, p=0.01) print('Mean, Var', mu, var) # plot the model sigma = math.sqrt(var) print('Sigma', sigma) xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=12.5) thinkplot.Plot(xs, ps, label='model', color='0.8') # plot the data cdf = thinkstats2.Cdf(weights, label='data') thinkplot.PrePlot(1) thinkplot.Cdf(cdf) thinkplot.Save(root='analytic_birthwgt_model', title='Birth weights', xlabel='birth weight (lbs)', ylabel='CDF')
def PlotRemainingLifetime(sf1, sf2): """Plots remaining lifetimes for pregnancy and age at first marriage. sf1: SurvivalFunction for pregnancy length sf2: SurvivalFunction for age at first marriage """ thinkplot.PrePlot(cols=2) rem_life1 = sf1.RemainingLifetime() thinkplot.Plot(rem_life1) thinkplot.Config(title='remaining pregnancy length', xlabel='weeks', ylabel='mean remaining weeks') thinkplot.SubPlot(2) func = lambda pmf: pmf.Percentile(50) rem_life2 = sf2.RemainingLifetime(filler=np.inf, func=func) thinkplot.Plot(rem_life2) thinkplot.Config(title='years until first marriage', ylim=[0, 15], xlim=[11, 31], xlabel='age (years)', ylabel='median remaining years') thinkplot.Save(root='survival6', formats=FORMATS)