def print_critics_users_diff(users_scores, critics_scores): users_scores_mean = users_scores.mean() critics_scores_mean = critics_scores.mean() print("Mean CRITIC score:", "%.2f" % critics_scores_mean) print("Mean USER score:", "%.2f" % users_scores_mean) print("\nUSER mean - CRITIC mean:", "%.2f" % (users_scores_mean - critics_scores_mean)) print( "Users rated albums", "%.2f" % ((users_scores_mean - critics_scores_mean) * 100 / critics_scores_mean), "percent higher than critics did.") print("\nEffect size (Cohen's D) of", "%.2f" % ts2.CohenEffectSize(users_scores, critics_scores)) types = ("Users", "Critics") ypos = np.arange(len(types)) scores = [users_scores_mean, critics_scores_mean] plt.bar(ypos, scores, align='center', alpha=0.5) plt.xticks(ypos, types) plt.ylabel('Mean score') plt.title('Mean album review score (out of 100)') plt.ylim(60, 90) plt.show()
def main(script): """Tests the functions in this module. script: string script name """ live, firsts, others = first.MakeFrames() hist = thinkstats2.Hist(live.prglngth) # test Mode mode = Mode(hist) print('Mode of preg length', mode) assert mode == 39, mode # test AllModes modes = AllModes(hist) assert modes[0][1] == 4693, modes[0][1] for value, freq in modes[:5]: print(value, freq) firsts_wgt = firsts.totalwgt_lb.mean() others_wgt = others.totalwgt_lb.mean() print("firsts = {} pounds, others = {} pounds, dif = {} pounds ".format( firsts_wgt, others_wgt, firsts_wgt - others_wgt)) d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print(d) print('%s: All tests passed.' % script)
def Summarize(live, firsts, others): """Print various summary statistics.""" mean = live.prglngth.mean() var = live.prglngth.var() std = live.prglngth.std() print('Live mean', mean) print('Live variance', var) print('Live std', std) mean1 = firsts.prglngth.mean() mean2 = others.prglngth.mean() var1 = firsts.prglngth.var() var2 = others.prglngth.var() print('Mean') print('First babies', mean1) print('Others', mean2) print('Variance') print('First babies', var1) print('Others', var2) print('Difference in weeks', mean1 - mean2) print('Difference in hours', (mean1 - mean2) * 7 * 24) print('Difference relative to 39 weeks', (mean1 - mean2) / 39 * 100) d = thinkstats2.CohenEffectSize(firsts.prglngth, others.prglngth) print('Cohen d', d)
def WeightDifference(live, firsts, others): """Explore the difference in weight between first babies and others. live: DataFrame of all live births firsts: DataFrame of first babies others: DataFrame of others """ mean0 = live.totalwgt_lb.mean() mean1 = firsts.totalwgt_lb.mean() mean2 = others.totalwgt_lb.mean() var1 = firsts.totalwgt_lb.var() var2 = others.totalwgt_lb.var() print('Mean') print('First babies', mean1) print('Others', mean2) print('Variance') print('First babies', var1) print('Others', var2) print('Difference in lbs', mean1 - mean2) print('Difference in oz', (mean1 - mean2) * 16) print('Difference relative to mean (%age points)', (mean1 - mean2) / mean0 * 100) d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print('Cohen d', d)
def WeightDifference(live, firsts, others): # calculate mean for difference in lbs mean1 = firsts.totalwgt_lb.mean() mean2 = others.totalwgt_lb.mean() print('Difference in lbs', mean1 - mean2) # firsts are lighter than others d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print('Cohen d', d) return d
def wt_diff(live, firsts, others): mean0 = live.totalwgt_lb.mean() mean1 = firsts.totalwgt_lb.mean() mean2 = others.totalwgt_lb.mean() var1 = firsts.totalwgt_lb.var() var2 = others.totalwgt_lb.var() print("mean: ", var1, var2) print("diff_lb: ", mean1 - mean2) print("diff_rel_mean: ", (mean1 - mean2) / mean0 * 100) d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print("cohen's d: ", d)
def WeightComparison(): preg = nsfg.ReadFemPreg() live = preg[preg.outcome == 1] firsts = live[live.birthord == 1] others = live[live.birthord != 1] d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print(d) print( "Still a really small effect size, though not as small as difference in pregnancy length" )
def WeightDifferences(first, other, live): print("Means:") first_mean = first.totalwgt_lb.mean() print("First mean:", first_mean) other_mean = other.totalwgt_lb.mean() print("Other mean:", other_mean) live_mean = live.totalwgt_lb.mean() print("Live mean:", live_mean) print("Means differences:", (abs(first_mean - other_mean) / live_mean)*100) d = thinkstats2.CohenEffectSize(first.totalwgt_lb, other.totalwgt_lb) return d
def WeightDiff(firsts, others): mean1 = firsts.totalwgt_lb.mean() mean2 = others.totalwgt_lb.mean() var1 = firsts.totalwgt_lb.var() var2 = others.totalwgt_lb.var() print('Mean') print('First babies ', mean1) print('Others ', mean2) print('Variance') print('First babies ', var1) print('Others ', var2) print('Difference mean that First babies and others ', mean1 - mean2) d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print('Cohen d ', d)
def Summarize(greq, less): """Print various summary statistics.""" gMean = greq.prglngth.mean() gVar = greq.prglngth.var() print '>= 30 mean:', gMean print '>= 30 variance:', gVar lMean = less.prglngth.mean() lVar = less.prglngth.var() print '< 30 mean:', lMean print '< 30 variance:', lVar print 'Difference between means:', (abs(gMean - lMean)) d = thinkstats2.CohenEffectSize(greq.prglngth, less.prglngth) print 'Cohen d:', d
def main(script): """Tests the functions in this module. script: string script name """ live, firsts, others = first.MakeFrames() hist = thinkstats2.Hist(live.prglngth) mean1 = firsts.totalwgt_lb.mean() mean2 = others.totalwgt_lb.mean() var1 = firsts.totalwgt_lb.var() var2 = others.totalwgt_lb.var() print('Mean Weight') print('First babies', mean1) print('Others babies', mean2) print('Variance in Weight') print('First babies', var1) print('Others babies', var2) print('Difference in lbs', mean1 - mean2) cohen_d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print('Cohen d', cohen_d) # test Mode mode = Mode(hist) print('Mode of preg length', mode) assert mode == 39, mode # test AllModes modes = AllModes(hist) assert modes[0][1] == 4693, modes[0][1] for value, freq in modes[:5]: print(value, freq) print('%s: All tests passed.' % script)
def CompareWeight(live, firsts, others): meanlive = live.totalwgt_lb.mean() meanfirsts = firsts.totalwgt_lb.mean() meanothers = others.totalwgt_lb.mean() print('Mean') print('Firsts', meanfirsts) print('Others', meanothers) print('Difference in lbs', meanfirsts-meanothers) print('Difference in %', (meanfirsts-meanothers) / meanlive * 100) varfirsts = firsts.totalwgt_lb.var() varothers = others.totalwgt_lb.var() print('Variance') print('Firsts', varfirsts) print('Others', varothers) d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print('Cohen effect', d)
def light_heavy(firsts, others): print(firsts.totalwgt_lb.mean(), others.totalwgt_lb.mean()) print(thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)) return ""
import math, thinkstats2, nsfg preg = nsfg.ReadFemPreg() live = preg[preg.outcome == 1] firsts = live[live.birthord == 1] others = live[live.birthord != 1] mean_f = firsts.totalwgt_lb.mean() mean_o = others.totalwgt_lb.mean() var_f = firsts.totalwgt_lb.var() var_o = others.totalwgt_lb.var() d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print('First babies average weight', mean_f) print('Other babies average weight', mean_o) print('Difference in weight between first and other babies, in oz:', (mean_f - mean_o) * 16) print('Difference in weight between first and other babies, % :', (mean_f - mean_o) * 100 / mean_o) print('First babies variance', var_f) print('Other babies variance', var_o) print('Cohen d for weight', d) n_f = len(firsts) n_o = len(others)
self.n, self.m = len(group1), len(group2) self.pool = np.hstack((group1, group2)) def RunModel(self): np.random.shuffle(self.pool) data = self.pool[:self.n], self.pool[self.n:] return data male, female = MakeFrames() alcwknd = male.alcwknd, female.alcwknd MakeHists(male, female) MakePmfs(male, female) MakeStep(male, female) MakeCdfs(male, female) ht = DiffMeansPermute(alcwknd) pvalue = ht.PValue() ht.PlotCdf() thinkplot.Config(xlabel='Difference in Means', ylabel='CDF', title='Weekend Alcohol Consumption') thinkplot.Show() print 'Weekend Alcohol Consumption:\nMen:\nMean:', male.alcwknd.mean( ), '\nVariance:', male.alcwknd.var() print '\nWomen:\nMean:', female.alcwknd.mean( ), '\nVariance:', female.alcwknd.var() print '\nDifference in means:', abs(male.alcwknd.mean() - female.alcwknd.mean()) print '\nCohen d:', thinkstats2.CohenEffectSize(male.alcwknd, female.alcwknd) print '\np-value:', pvalue
print('Others', var2) print('Difference in lbs', mean1 - mean2) print('Difference in oz', (mean1 - mean2) * 16) print('Difference relative to mean (%age points)', (mean1 - mean2) / mean0 * 100) d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print('Cohen d', d) # In[319]: d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print('Cohen d', d) # In[325]: mean0 = live.totalwgt_lb.mean() mean1 = firsts.totalwgt_lb.mean() mean2 = others.totalwgt_lb.mean() print('Mean') print('First babies', mean1) print('Others', mean2)
group2 = np.random.choice(self.pool, self.m, replace=True) data = group1, group2 return data preg = nsfg.ReadFemPreg() live = preg[preg["outcome"] == 1] resp = nsfg.ReadFemResp() bs = brfss.ReadBrfss() income = hinc.ReadData() log_intp_income = hinc2.InterpolateSample(income, log_upper=6.0) # Q1. Think Stats Chapter 2 Exercise 4 (effect size of Cohen's d) first_wt = live.loc[preg["birthord"] == 1, "totalwgt_lb"] other_wt = live.loc[preg["birthord"] != 1, "totalwgt_lb"] ts.CohenEffectSize(first_wt, other_wt) # Q2. Think Stats Chapter 3 Exercise 1 (actual vs. biased) d = np.diff(np.unique(resp["numkdhh"])).min() left_of_first_bin = resp["numkdhh"].min() - float(d) / 2 right_of_last_bin = resp["numkdhh"].max() + float(d) / 2 plt.clf() plt.hist(resp["numkdhh"], bins=np.arange(left_of_first_bin, right_of_last_bin + d, d), histtype="step", normed=True, label="Actual") plt.hist(resp["numkdhh"], bins=np.arange(left_of_first_bin, right_of_last_bin + d, d), histtype="step", weights=resp["numkdhh"],
def PregnancyLengthDifferences(first, other): return thinkstats2.CohenEffectSize(first.prglngth, other.prglngth)
new_pmf[val] *= val new_pmf.Normalize() return new_pmf preg = nsfg.ReadFemPreg() resp = nsfg.ReadFemResp() live, firsts, others = first.MakeFrames() #--- Chapter2 Ex4 wgt_live = live.totalwgt_lb.dropna() wgt_first = firsts.totalwgt_lb.dropna() wgt_other = others.totalwgt_lb.dropna() mean_diff = 100 * (wgt_first.mean() - wgt_other.mean()) / wgt_live.mean() wgt_cohend = thinkstats2.CohenEffectSize(wgt_first, wgt_other) plen_cohend = thinkstats2.CohenEffectSize(firsts.prglngth, others.prglngth) print('Difference in relative mean:', mean_diff) print('Cohen\'s d for total weight in lbs:', wgt_cohend) print('Cohen\'s d for pregnancy length in weeks:', plen_cohend) #--- Chapter3 Ex1 actual_pmf = thinkstats2.Pmf(resp.numkdhh, label='actual') biased_pmf = BiasPmf(actual_pmf, label='biased') thinkplot.PrePlot(2) actual_hist = thinkplot.Pmf(actual_pmf) biased_hist = thinkplot.Pmf(biased_pmf) thinkplot.Show(xlabel='#kids in household', ylabel='PMF') print('Actual Mean:', actual_pmf.Mean()) print('Biased Mean:', biased_pmf.Mean())
def preg_length_comparison(live, firsts, others): # Utilizing existing function from thinkstats2 d = thinkstats2.CohenEffectSize(firsts['prglngth'], others['prglngth']) return print('Preg Length Cohen Effect Size:', d)
def WeightDifference(firsts, others): print(firsts.totalwgt_lb.mean() - others.totalwgt_lb.mean()) print(thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb))
def weight_comparison(live, firsts, others): # Utilizing existing function from thinkstats2 d = thinkstats2.CohenEffectSize(firsts['totalwgt_lb'], others['totalwgt_lb']) return print('Weight Cohen Effect Size:', d)
def COHEN(Frame1, Frame2): CD = (Frame1.mean() - Frame2.mean()) SD = ((Frame1.std()**2 + Frame2.std()**2) / 2)**.5 CD = CD / SD return CD import thinkstats2, first live, firsts, others = first.MakeFrames( ) #Gets data from dataframe python. taken from solutions obvs TotalW = live.totalwgt_lb FirstW = firsts.totalwgt_lb OtherW = others.totalwgt_lb MyCohen = COHEN(FirstW, OtherW) NotMyCohen = thinkstats2.CohenEffectSize(FirstW, OtherW) FirstPMean = firsts.prglngth.mean() OtherPMean = others.prglngth.mean() print("the mean weight of first babies is " + str(FirstW.mean()) + " lbs") print("the mean weight of other babies is " + str(OtherW.mean()) + " lbs") print("the Cohen's d between the two sets is " + str(MyCohen)) print("the mean pregnancy length of first babies was " + str(FirstPMean) + " weeks") print("the mean pregnancy length of other babies was " + str(OtherPMean) + " weeks") print("the difference between the pregnancy lengths was " + str(FirstPMean - OtherPMean))