def main(): pregnancies = first.get_pregnancies_dataset() live_pregnancies = [preg for preg in pregnancies.records if preg.outcome == 1] firsts, subsequents = first.split_one_vs_all(live_pregnancies, birthord=1) period_first = first.get_values_for_field(firsts, 'prglength') period_subsequent = first.get_values_for_field(subsequents, 'prglength') var_first = thinkstats.Var(period_first) var_second = thinkstats.Var(period_subsequent) print('Variance of the first children gestation periods: {} weeks^2' .format(var_first)) print('\tAnd the sd is {} weeks.'.format(math.sqrt(var_first))) print('Variance of the subsequent children gestation periods: {} weeks^2' .format(var_second)) print('\tAnd the sd is {} weeks.'.format(math.sqrt(var_second))) weights = get_pumpkin_weights() print('Finding the variance and mean for these pumpkins: {}' .format(weights)) mu, var = pumpkins(weights) sd = math.sqrt(var) print('\tmean = {}\tvariance = {}\tsd = {}'.format(mu, var, sd)) bar_chart(pregnancies) plot_pregnancy_pmf(pregnancies) plot_pregnancies_pmf_differences(pregnancies)
def plot_pregnancies_pmf_differences(prg, week_lo=35, week_hi=46): live_prg = first.get_live_pregnancies(prg) firsts, subsequents = first.split_one_vs_all(live_prg, birthord=1) first_prglen = [record.prglength for record in firsts] subsequent_prglen = [record.prglength for record in subsequents] fig = plt.figure() ax = fig.add_subplot(111) p1 = pmf.MakePmfFromList(first_prglen) p2 = pmf.MakePmfFromList(subsequent_prglen) result = {week: 100 * (p1.Prob(week) - p2.Prob(week)) for week in xrange(week_lo, week_hi)} ax.bar(result.keys(), result.values(), width=1., color='blue') ax.set_title('Percent differences in probabilities of gestation periods') ax.set_xlabel('Gestation period(weeks)') ax.set_ylabel('100(Pfirst - Psubsequent)') plt.show()
def main(): live = first.get_live_pregnancies(first.get_pregnancies_dataset()) firsts, others = first.split_one_vs_all(live, birthord=1) pmf_firsts = Pmf.MakePmfFromList(item.prglength for item in firsts) pmf_others = Pmf.MakePmfFromList(item.prglength for item in others) pmf_all = Pmf.MakePmfFromList(item.prglength for item in live) birth_types = ('first babies', 'subsequent babies', 'all live births') prob_types = ('early', 'on time', 'late') for i, pmf in enumerate((pmf_firsts, pmf_others, pmf_all)): for j, f in enumerate((prob_early, prob_on_time, prob_late)): print('The probability of {} being born {} is {} %'.format( birth_types[i], prob_types[j], f(pmf) * 100 )) risks = get_relative_risks(pmf_firsts, pmf_others) print() for prob_type in prob_types: print('The relative risk of first babies to others arriving {} is {}' .format(prob_type, risks[prob_type]))
def bar_chart(pregnancies): live_pregnancies = first.get_live_pregnancies(pregnancies) firsts, seconds = first.split_one_vs_all(live_pregnancies, birthord=1) period_first = first.get_values_for_field(firsts, 'prglength') period_second = first.get_values_for_field(seconds, 'prglength') hist1 = pmf.MakeHistFromList(period_first) hist2 = pmf.MakeHistFromList(period_second) fig = plt.figure() ax = fig.add_subplot(111) width = 0.35 x1, y1 = hist1.Render() x2, y2 = hist2.Render() r1 = ax.bar(x1, y1, width, color='blue') r2 = ax.bar(np.array(x2) + width, y2, width, color='grey') ax.set_xlabel('Gestation period(weeks)') ax.set_ylabel('Frequency') ax.set_title('Comparison of the gestation periods of first and subsequent' ' babies') ax.legend((r1[0], r2[0]), ('First Babies', 'Subsequent Babies')) plt.show()
def plot_pregnancy_pmf(prg): live_prg = first.get_live_pregnancies(prg) firsts, subsequents = first.split_one_vs_all(live_prg, birthord=1) first_prglen = [record.prglength for record in firsts] subsequent_prglen = [record.prglength for record in subsequents] fig = plt.figure() ax = fig.add_subplot(111) width = 0.35 p1, p2 = map(pmf.MakePmfFromList, (first_prglen, subsequent_prglen)) x1, y1 = p1.Render() x2, y2 = p2.Render() r1 = ax.bar(x1, y1, width, color='blue') r2 = ax.bar(np.array(x2) + width, y2, width, color='grey') ax.set_xlabel('Gestation period(weeks)') ax.set_ylabel('Probability') ax.set_title('Comparison of the gestation periods of first and subsequent' ' babies') ax.legend((r1[0], r2[0]), ('First Babies', 'Subsequent Babies')) plt.show()
it has not happened before it. Args: pmf(Pmf.Pmf): The PMF of pregnancies. ''' my_pmf = pmf.Copy() for value in my_pmf.Values(): if value < week: my_pmf.Remove(value) my_pmf.Normalize() return my_pmf.Prob(week) if __name__ == '__main__': live_pregnancies = get_live_pregnancies(get_pregnancies_dataset()) firsts, others = split_one_vs_all(live_pregnancies, birthord=1) first_durations = [record.prglength for record in firsts] others_durations = [record.prglength for record in others] first_pmf = Pmf.MakePmfFromList(first_durations) others_pmf = Pmf.MakePmfFromList(others_durations) fig = plt.figure() sp = fig.add_subplot(111) # weeks1, weeks2 = (range(min(first_durations), max(first_durations)), # range(min(others_durations), max(others_durations))) weeks1, weeks2 = map(lambda t: range(*t), ((36, 45), (36, 45))) probs_first = [probability_of_birth_in_week(first_pmf, week) for week in weeks1] probs_others = [probability_of_birth_in_week(others_pmf, week) for week in weeks2] r1 = sp.plot(weeks1, probs_first, color='b') r2 = sp.plot(weeks2, probs_others, color='r')