import sys
from data.pitchfx import PitchFxDataset
import pandas as pd
from tables.utils import add_header, change_fontsize, add_divider

sys.path.extend(['/home/simon/Documents/601-Project/code'])

pitchfx = PitchFxDataset()

pitches = pitchfx.pitchfx

# ---------------- compute experiements --------------------

# count
pitches["ball_bin"] = pd.cut(x=pitches["b_count"],
                             bins=[0, 2, 3],
                             labels=["[0,2]", "{3}"],
                             include_lowest=True)
pitches["strike_bin"] = pd.cut(x=pitches["s_count"],
                               bins=[0, 1, 2],
                               labels=["[0,1]", "{2}"],
                               include_lowest=True)

# movement
pitches["move_horiz"] = pd.cut(x=pitches["pfx_x_std"],
                               bins=[-60, 0, 60],
                               labels=["Inward", "Outward"],
                               include_lowest=True)
pitches["move_vert"] = pd.cut(x=pitches["pfx_z"],
                              bins=[-20, 5, 20],
                              labels=["Upward", "Downward"],
Пример #2
0
import sys
sys.path.extend(['/home/simon/Documents/601-Project/code'])
from data.pitchfx import PitchFxDataset
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

pitchfx = PitchFxDataset()
pd.crosstab(pitchfx.pitchfx["type"], pitchfx.pitchfx["type_from_sz"])
df = pitchfx.group_by(
    umpire_HP="all",
    stand="all",
)

# to iterate through all:
for levels, d in df:
    print(len(d), levels)

plt.scatter(pitchfx.pitchfx["pz"][:1000], pitchfx.pitchfx["pz_std"][:1000])
plt.hist(pitchfx.pitchfx["pz_std"] - pitchfx.pitchfx["pz"])
plt.show()

vals[:N, 2] = np.linspace(2 / 256, 76 / 256, N)
vals[N:, :] = vals[N - 1, :]
newcmp16 = matplotlib.colors.ListedColormap(vals)
newcmp16.set_bad(color='white')

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 1000)

# ---------------- SETUP ------------------

plt.style.use("seaborn")
sys.path.extend(['/home/simon/Documents/601-Project/code'])
encoder_path = "./data/models/encoding/all_fit.txt"

pitchfx = PitchFxDataset()

# Balls and strike counts

df = pitchfx.group_by(umpire_HP="all", b_count=[0, 2, 3], s_count=[0, 1, 2])

with open(encoder_path, "rb") as f:
    _, embeddings, groups, _, _ = pickle.load(f)

ids = [groups.index(gr) for gr, _ in df if gr in groups]

embeddings = embeddings[ids, :]
groups = [groups[i] for i in ids]

df = pd.DataFrame(embeddings,
                  index=pd.MultiIndex.from_tuples(groups)).reset_index()
Пример #4
0
import sys
from data.pitchfx import PitchFxDataset
import pandas as pd
from tables.utils import add_header, change_fontsize, add_divider

sys.path.extend(['/home/simon/Documents/601-Project/code'])

pitchfx = PitchFxDataset()

pd.options.display.max_colwidth = 10000

summary = pd.DataFrame(columns=["split", "count", "min", "med", "max"])
# ---------------- compute experiements --------------------

# count
df = pitchfx.group_by(umpire_HP="all", b_count=[0, 2, 3], s_count=[0, 1, 2])
counts = df.agg("count")["px"]
stats = counts.agg(["count", "min", "median", "max"]).to_numpy()
summary.loc[1] = [
    "Umpire (39),\newline Ball count ([0,2], {3}),\newline Strike count ([0,1], {2})",
    *stats
]
# movement
df = pitchfx.group_by(umpire_HP="all", pfx_x=[-60, 0, 60], pfx_z=[-20, 5, 20])
counts = df.agg("count")["px"]
stats = counts.agg(["count", "min", "median", "max"]).to_numpy()
summary.loc[2] = [
    "Umpire (39),\newline Horiz. movement (inward, outward),\newline Vert. movement (upward, downward)",
    *stats
]
# batter/pitcher
Пример #5
0
vals[:N, 2] = np.linspace(2 / 256, 76 / 256, N)
vals[N:, :] = vals[N - 1, :]
newcmp16 = matplotlib.colors.ListedColormap(vals)
newcmp16.set_bad(color='white')

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 1000)

# ---------------- SETUP ------------------

plt.style.use("seaborn")
sys.path.extend(['/home/simon/Documents/601-Project/code'])
encoder_path = "./data/models/encoding/all_fit.txt"

pitchfx = PitchFxDataset()

# Balls and strike counts

pitchfx.pitchfx[
    "score_diff_b_p"] = pitchfx.pitchfx["b_score"] - pitchfx.pitchfx["p_score"]

df = pitchfx.group_by(umpire_HP="all",
                      score_diff_b_p=[-25, -2, 1, 25],
                      inning=[1, 6, 18])

with open(encoder_path, "rb") as f:
    _, embeddings, groups, _, _ = pickle.load(f)

ids = [groups.index(gr) for gr, _ in df if gr in groups]
Пример #6
0
from data.pitchfx import PitchFxDataset
from models.classification.StrikeZoneLearner import StrikeZoneLearner
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from plot.utils import plot_pitches

plt.style.use("seaborn")

sys.path.extend(['/home/simon/Documents/601-Project/code'])
with open("./data/models/classifiers/umpire_balls_strikes_roc_auc_svc_klr.txt",
          "rb") as f:
    szl = pickle.load(f)

pitchfx = PitchFxDataset()
df = pitchfx.group_by(umpire_HP="all", b_count=[0, 2, 3], s_count=[0, 1, 2])

pitchfx.pitchfx["pfx_x"] = np.where(pitchfx.pitchfx["stand"] == "L",
                                    -pitchfx.pitchfx["pfx_x"],
                                    pitchfx.pitchfx["pfx_x"])
df = pitchfx.group_by(umpire_HP="all", pfx_x=[-60, 0, 60], pfx_z=[-20, 5, 20])

df = pitchfx.group_by(umpire_HP="all", p_throws="all", stand="all")

pitchfx.pitchfx[
    "score_diff_b_p"] = pitchfx.pitchfx["b_score"] - pitchfx.pitchfx["p_score"]

df = pitchfx.group_by(umpire_HP="all",
                      score_diff_b_p=[-25, -2, 1, 25],
                      inning=[1, 6, 18])