def MakeFigures(firsts, others): """Plot Hists and Pmfs for pregnancy length. firsts: DataFrame others: DataFrame """ first_hist = thinkstats2.MakeHistFromList(firsts.prglngth) first_pmf = thinkstats2.MakePmfFromHist(first_hist) other_hist = thinkstats2.MakeHistFromList(others.prglngth) other_pmf = thinkstats2.MakePmfFromHist(other_hist) width = 0.4 first_options = dict(label='first', width=-width) other_options = dict(label='other', width=width) # plot the histograms thinkplot.PrePlot(2) thinkplot.Hist(first_hist, **first_options) thinkplot.Hist(other_hist, **other_options) axis = [27, 46, 0, 2700] thinkplot.Save(root='nsfg_hist', title='Histogram', xlabel='weeks', ylabel='frequency', axis=axis) # plot the PMFs thinkplot.PrePlot(2) thinkplot.Hist(first_pmf, **first_options) thinkplot.Hist(other_pmf, **other_options) axis = [27, 46, 0, 0.6] thinkplot.Save(root='nsfg_pmf', title='PMF', xlabel='weeks', ylabel='probability', axis=axis) # plot the differences in the PMFs weeks = range(35, 46) diffs = [] for week in weeks: p1 = first_pmf.Prob(week) p2 = other_pmf.Prob(week) diff = 100 * (p1 - p2) diffs.append(diff) thinkplot.PrePlot(1) thinkplot.Bar(weeks, diffs, align='center') thinkplot.Save(root='nsfg_diffs', title='Difference in PMFs', xlabel='weeks', ylabel='percentage points', legend=False)
def pmf_diff_in_percentage(first_pmf, other_pmf): """This figure makes the pattern clearer: first babies are less likely to be born in week 39, and somewhat more likely to be born in weeks 41 and 42""" weeks = range(35, 46) # To look near mode diffs = [] for week in weeks: p1 = first_pmf.Prob(week) p2 = other_pmf.Prob(week) diff = 100 * (p1 - p2) diffs.append(diff) thinkplot.Bar(weeks, diffs) thinkplot.Show(xlabel="Weeks", ylabel="pmf % diff")
def MakeFigures(firsts, others): """Plot Pmfs of pregnancy length. firsts: DataFrame others: DataFrame """ # plot the PMFs first_pmf = thinkstats2.Pmf(firsts.prglngth, label='first') other_pmf = thinkstats2.Pmf(others.prglngth, label='other') width = 0.45 thinkplot.PrePlot(2, cols=2) thinkplot.Hist(first_pmf, align='right', width=width) thinkplot.Hist(other_pmf, align='left', width=width) thinkplot.Config(xlabel='weeks', ylabel='probability', axis=[27, 46, 0, 0.6]) thinkplot.PrePlot(2) thinkplot.SubPlot(2) thinkplot.Pmfs([first_pmf, other_pmf]) thinkplot.Save(root='probability_nsfg_pmf', xlabel='weeks', axis=[27, 46, 0, 0.6]) # plot the differences in the PMFs weeks = range(35, 46) diffs = [] for week in weeks: p1 = first_pmf.Prob(week) p2 = other_pmf.Prob(week) diff = 100 * (p1 - p2) diffs.append(diff) thinkplot.Bar(weeks, diffs) thinkplot.Save(root='probability_nsfg_diffs', title='Difference in PMFs', xlabel='weeks', ylabel='percentage points', legend=False)
thinkplot.PrePlot(2) thinkplot.Pmfs([first_pmf, others_pmf]) thinkplot.show(xlabl='week', ylabel='probability', axis=[27, 46, 0, 0.6]) # %% [markdown] # ## 3.3 その他の可視化 # %% # 差を棒グラフで表示 weeks = range(35, 46) diffs = [] for week in weeks: p1 = first_pmf.Prob(week) p2 = others_pmf.Prob(week) diff = 100 * (p1 - p2) diffs.append(diff) thinkplot.Bar(weeks, diffs) # %% [markdown] # ## 3.4 クラスサイズのパラドックス # %% d = {7: 8, 12: 8, 17: 14, 22: 4, 27: 6, 32: 12, 37: 8, 42: 3, 47: 2} pmf = thinkstats2.Pmf(d, label='actual') print('mean of the actual pmf:', pmf.Mean()) # %% def BiasPmf(pmf, label): """バイアスされたPmfを推定する Arguments: pmf {Pmf} -- actual pmf label {string} -- label Returns: