def MakeFigures(df): """Generates CDFs and normal prob plots for weights and log weights.""" weights = df.wtkg2.dropna() log_weights = np.log10(weights) # plot weights on linear and log scales thinkplot.PrePlot(cols=2) MakeNormalModel(weights) thinkplot.Config(xlabel='adult weight (kg)', ylabel='CDF') thinkplot.SubPlot(2) MakeNormalModel(log_weights) thinkplot.Config(xlabel='adult weight (log10 kg)') thinkplot.Save(root='brfss_weight') # make normal probability plots on linear and log scales thinkplot.PrePlot(cols=2) MakeNormalPlot(weights) thinkplot.Config(xlabel='z', ylabel='weights (kg)') thinkplot.SubPlot(2) MakeNormalPlot(log_weights) thinkplot.Config(xlabel='z', ylabel='weights (log10 kg)') thinkplot.Save(root='brfss_weight_normal')
def main(filename='mystery0.dat'): data = ReadFile(filename) cdf = thinkstats2.Cdf(data) thinkplot.PrePlot(rows=2, cols=3) thinkplot.SubPlot(1) thinkplot.Cdf(cdf) thinkplot.Config(title='linear') thinkplot.SubPlot(2) scale = thinkplot.Cdf(cdf, xscale='log') thinkplot.Config(title='logx', **scale) thinkplot.SubPlot(3) scale = thinkplot.Cdf(cdf, transform='exponential') thinkplot.Config(title='expo', **scale) thinkplot.SubPlot(4) xs, ys = thinkstats2.NormalProbability(data) thinkplot.Plot(xs, ys) thinkplot.Config(title='normal') thinkplot.SubPlot(5) scale = thinkplot.Cdf(cdf, transform='pareto') thinkplot.Config(title='pareto', **scale) thinkplot.SubPlot(6) scale = thinkplot.Cdf(cdf, transform='weibull') thinkplot.Config(title='weibull', **scale) thinkplot.Show(legend=False)
def MakeBabyBoom(): """Plot CDF of interarrival time on log and linear scales. """ # compute the interarrival times df = ReadBabyBoom() diffs = df.minutes.diff() cdf = thinkstats2.Cdf(diffs, label='actual') thinkplot.PrePlot(cols=2) thinkplot.Cdf(cdf) thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False) thinkplot.SubPlot(2) thinkplot.Cdf(cdf, complement=True) thinkplot.Config(xlabel='minutes', ylabel='CCDF', yscale='log', legend=False) thinkplot.Save(root='analytic_interarrivals', legend=False)
def PlotMarriageData(resp): """Plots hazard and survival functions. resp: DataFrame of respondents """ hf, sf = EstimateMarriageSurvival(resp) thinkplot.PrePlot(rows=2) thinkplot.Plot(hf) thinkplot.Config(ylabel='hazard', legend=False) thinkplot.SubPlot(2) thinkplot.Plot(sf) thinkplot.Save(root='survival2', xlabel='age (years)', ylabel='prob unmarried', ylim=[0, 1], legend=False, formats=FORMATS) return sf
def PlotSurvival(complete): """Plots survival and hazard curves. complete: list of complete lifetimes """ thinkplot.PrePlot(3, rows=2) cdf = thinkstats2.Cdf(complete, label='cdf') sf = MakeSurvivalFromCdf(cdf, label='survival') print(cdf[13]) print(sf[13]) thinkplot.Plot(sf) thinkplot.Cdf(cdf, alpha=0.2) thinkplot.Config() thinkplot.SubPlot(2) hf = sf.MakeHazardFunction(label='hazard') print(hf[39]) thinkplot.Plot(hf) thinkplot.Config(ylim=[0, 0.75])
def PlotRemainingLifetime(sf1, sf2): """Plots remaining lifetimes for pregnancy and age at first marriage. sf1: SurvivalFunction for pregnancy length sf2: SurvivalFunction for age at first marriage """ thinkplot.PrePlot(cols=2) rem_life1 = sf1.RemainingLifetime() thinkplot.Plot(rem_life1) thinkplot.Config(title='remaining pregnancy length', xlabel='weeks', ylabel='mean remaining weeks') thinkplot.SubPlot(2) func = lambda pmf: pmf.Percentile(50) rem_life2 = sf2.RemainingLifetime(filler=np.inf, func=func) thinkplot.Plot(rem_life2) thinkplot.Config(title='years until first marriage', ylim=[0, 15], xlim=[11, 31], xlabel='age (years)', ylabel='median remaining years') thinkplot.Save(root='survival6', formats=FORMATS)