Пример #1
0
print df_espn['AFG%'].sort_index()
print team_report['team_efg_e5'].sort_index()

x = team_report['team_efg_e5'].sort_index() - df_espn['AFG%'].sort_index()

print "mean diff between whole season stats and epoch 5: ", x.mean()

# mean at 0.0100486358233. So basically, epoch 5 stats are the same as entire season. Valid baseline. 


# -----------------------

# epoch 5 vs espn season stats

tfo_extra.plot_scatter_with_reg_overlay(df_espn['AFG%'].sort_index(), team_report['team_efg_e5'].sort_index(), figurenum = 1002, overlay = True)

plt.xlabel('regular season eFG%')
plt.ylabel('eFG% in epoch 5')

print pearsonr(df_espn['AFG%'].sort_index(), team_report['team_efg_e5'].sort_index())
# results: (0.81549272252163574, 4.0378399944744899e-08)

# ------------


# Ok -- do some code // team_efg_e4 defined in alternate file (tfo_team_report)

print "season avg vs epoch 5: "
print pearsonr(df_espn['AFG%'].sort_index(), team_report['team_efg_e5'].sort_index())
Пример #2
0
    bigdf[filter_newepoch].groupby("Tm")["points"].sum() * 0.5 / bigdf[filter_newepoch].groupby("Tm")["points"].count()
)

# generate second comparison point. To compare with team_efg_diff
team_efg_diff2 = team_efg_econtrol - team_report["team_efg_e5"]

# mini table of the two diffs
x = pd.DataFrame(data={"team_efg_diff": team_report["team_efg_diff"], "team_efg_diff2": team_efg_diff2})
x.mean()


# -- plot comparisons
# Using plot function from tfo_extra.py

tfo_extra.plot_scatter_with_reg_overlay(
    team_report["team_efg_e5"], team_report["team_efg_diff"], figurenum=1010, overlay=True
)
plt.xlabel("Team efg e5")
plt.ylabel("Diff efg e3, e5")

tfo_extra.plot_scatter_with_reg_overlay(team_report["team_efg_e5"], team_efg_diff2, figurenum=1011, overlay=True)  #
plt.xlabel("Team efg e5")
plt.ylabel("Diff efg e3, econtrol")


# crank out r values //

# one correlates, one doenst. Seems valid.

# Last check: use econtrol in place of e5