Пример #1
0
import sys
sys.path.extend(['/home/simon/Documents/601-Project/code'])
from data.pitchfx import PitchFxDataset
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

pitchfx = PitchFxDataset()
pd.crosstab(pitchfx.pitchfx["type"], pitchfx.pitchfx["type_from_sz"])
df = pitchfx.group_by(
    umpire_HP="all",
    stand="all",
)

# to iterate through all:
for levels, d in df:
    print(len(d), levels)

plt.scatter(pitchfx.pitchfx["pz"][:1000], pitchfx.pitchfx["pz_std"][:1000])
plt.hist(pitchfx.pitchfx["pz_std"] - pitchfx.pitchfx["pz"])
plt.show()

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 1000)

# ---------------- SETUP ------------------

plt.style.use("seaborn")
sys.path.extend(['/home/simon/Documents/601-Project/code'])
encoder_path = "./data/models/encoding/all_fit.txt"

pitchfx = PitchFxDataset()

# Balls and strike counts

df = pitchfx.group_by(umpire_HP="all", b_count=[0, 2, 3], s_count=[0, 1, 2])

with open(encoder_path, "rb") as f:
    _, embeddings, groups, _, _ = pickle.load(f)

ids = [groups.index(gr) for gr, _ in df if gr in groups]

embeddings = embeddings[ids, :]
groups = [groups[i] for i in ids]

df = pd.DataFrame(embeddings,
                  index=pd.MultiIndex.from_tuples(groups)).reset_index()
df.columns = [
    "umpire", "ball_count", "strike_count", *["c" + str(i) for i in range(10)]
]
df["ball_count"] = df["ball_count"].str.replace("b_count_", "")
Пример #3
0
import sys
from data.pitchfx import PitchFxDataset
import pandas as pd
from tables.utils import add_header, change_fontsize, add_divider

sys.path.extend(['/home/simon/Documents/601-Project/code'])

pitchfx = PitchFxDataset()

pd.options.display.max_colwidth = 10000

summary = pd.DataFrame(columns=["split", "count", "min", "med", "max"])
# ---------------- compute experiements --------------------

# count
df = pitchfx.group_by(umpire_HP="all", b_count=[0, 2, 3], s_count=[0, 1, 2])
counts = df.agg("count")["px"]
stats = counts.agg(["count", "min", "median", "max"]).to_numpy()
summary.loc[1] = [
    "Umpire (39),\newline Ball count ([0,2], {3}),\newline Strike count ([0,1], {2})",
    *stats
]
# movement
df = pitchfx.group_by(umpire_HP="all", pfx_x=[-60, 0, 60], pfx_z=[-20, 5, 20])
counts = df.agg("count")["px"]
stats = counts.agg(["count", "min", "median", "max"]).to_numpy()
summary.loc[2] = [
    "Umpire (39),\newline Horiz. movement (inward, outward),\newline Vert. movement (upward, downward)",
    *stats
]
# batter/pitcher
Пример #4
0
# classifiers
from models.classification.kernel_logistic_regression import KernelLogisticRegression
from sklearn.svm import SVC
from pygam import LogisticGAM, te
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from models.classification.polynomial_logistic_regression import PolynomialLogisticRegression

sys.path.extend(['/home/simon/Documents/601-Project/code'])
out_file = "./data/models/classifiers/umpire_pitchers_batters_auc_roc_svc_klr.txt"

pitchfx = PitchFxDataset()

df = pitchfx.group_by(
	umpire_HP="all",
	p_throws="all",
    stand="all"
)
szl = StrikeZoneLearner(scoring="roc_auc")

classifiers = []

# add SVC
svc = SVC(probability=True)
svc_params = {
	"C": np.logspace(-1, 1, 7),
	"gamma": np.logspace(-1, 0.3, 7),
	"class_weight": ["balanced"]
}
classifiers.append((svc, svc_params))
Пример #5
0
# ---------------- SETUP ------------------

plt.style.use("seaborn")
sys.path.extend(['/home/simon/Documents/601-Project/code'])
encoder_path = "./data/models/encoding/all_fit.txt"

pitchfx = PitchFxDataset()

# Balls and strike counts

pitchfx.pitchfx[
    "score_diff_b_p"] = pitchfx.pitchfx["b_score"] - pitchfx.pitchfx["p_score"]

df = pitchfx.group_by(umpire_HP="all",
                      score_diff_b_p=[-25, -2, 1, 25],
                      inning=[1, 6, 18])

with open(encoder_path, "rb") as f:
    _, embeddings, groups, _, _ = pickle.load(f)

ids = [groups.index(gr) for gr, _ in df if gr in groups]

embeddings = embeddings[ids, :]
groups = [groups[i] for i in ids]

df = pd.DataFrame(embeddings,
                  index=pd.MultiIndex.from_tuples(groups)).reset_index()
df.columns = ["umpire", "score", "inning", *["c" + str(i) for i in range(10)]]

# ------------ MANOVA -------------------