print df_espn['AFG%'].sort_index() print team_report['team_efg_e5'].sort_index() x = team_report['team_efg_e5'].sort_index() - df_espn['AFG%'].sort_index() print "mean diff between whole season stats and epoch 5: ", x.mean() # mean at 0.0100486358233. So basically, epoch 5 stats are the same as entire season. Valid baseline. # ----------------------- # epoch 5 vs espn season stats tfo_extra.plot_scatter_with_reg_overlay(df_espn['AFG%'].sort_index(), team_report['team_efg_e5'].sort_index(), figurenum = 1002, overlay = True) plt.xlabel('regular season eFG%') plt.ylabel('eFG% in epoch 5') print pearsonr(df_espn['AFG%'].sort_index(), team_report['team_efg_e5'].sort_index()) # results: (0.81549272252163574, 4.0378399944744899e-08) # ------------ # Ok -- do some code // team_efg_e4 defined in alternate file (tfo_team_report) print "season avg vs epoch 5: " print pearsonr(df_espn['AFG%'].sort_index(), team_report['team_efg_e5'].sort_index())
bigdf[filter_newepoch].groupby("Tm")["points"].sum() * 0.5 / bigdf[filter_newepoch].groupby("Tm")["points"].count() ) # generate second comparison point. To compare with team_efg_diff team_efg_diff2 = team_efg_econtrol - team_report["team_efg_e5"] # mini table of the two diffs x = pd.DataFrame(data={"team_efg_diff": team_report["team_efg_diff"], "team_efg_diff2": team_efg_diff2}) x.mean() # -- plot comparisons # Using plot function from tfo_extra.py tfo_extra.plot_scatter_with_reg_overlay( team_report["team_efg_e5"], team_report["team_efg_diff"], figurenum=1010, overlay=True ) plt.xlabel("Team efg e5") plt.ylabel("Diff efg e3, e5") tfo_extra.plot_scatter_with_reg_overlay(team_report["team_efg_e5"], team_efg_diff2, figurenum=1011, overlay=True) # plt.xlabel("Team efg e5") plt.ylabel("Diff efg e3, econtrol") # crank out r values // # one correlates, one doenst. Seems valid. # Last check: use econtrol in place of e5