import sys sys.path.extend(['/home/simon/Documents/601-Project/code']) from data.pitchfx import PitchFxDataset import matplotlib.pyplot as plt import pandas as pd import numpy as np pitchfx = PitchFxDataset() pd.crosstab(pitchfx.pitchfx["type"], pitchfx.pitchfx["type_from_sz"]) df = pitchfx.group_by( umpire_HP="all", stand="all", ) # to iterate through all: for levels, d in df: print(len(d), levels) plt.scatter(pitchfx.pitchfx["pz"][:1000], pitchfx.pitchfx["pz_std"][:1000]) plt.hist(pitchfx.pitchfx["pz_std"] - pitchfx.pitchfx["pz"]) plt.show()
pd.set_option('display.max_rows', 100) pd.set_option('display.max_columns', 20) pd.set_option('display.width', 1000) # ---------------- SETUP ------------------ plt.style.use("seaborn") sys.path.extend(['/home/simon/Documents/601-Project/code']) encoder_path = "./data/models/encoding/all_fit.txt" pitchfx = PitchFxDataset() # Balls and strike counts df = pitchfx.group_by(umpire_HP="all", b_count=[0, 2, 3], s_count=[0, 1, 2]) with open(encoder_path, "rb") as f: _, embeddings, groups, _, _ = pickle.load(f) ids = [groups.index(gr) for gr, _ in df if gr in groups] embeddings = embeddings[ids, :] groups = [groups[i] for i in ids] df = pd.DataFrame(embeddings, index=pd.MultiIndex.from_tuples(groups)).reset_index() df.columns = [ "umpire", "ball_count", "strike_count", *["c" + str(i) for i in range(10)] ] df["ball_count"] = df["ball_count"].str.replace("b_count_", "")
import sys from data.pitchfx import PitchFxDataset import pandas as pd from tables.utils import add_header, change_fontsize, add_divider sys.path.extend(['/home/simon/Documents/601-Project/code']) pitchfx = PitchFxDataset() pd.options.display.max_colwidth = 10000 summary = pd.DataFrame(columns=["split", "count", "min", "med", "max"]) # ---------------- compute experiements -------------------- # count df = pitchfx.group_by(umpire_HP="all", b_count=[0, 2, 3], s_count=[0, 1, 2]) counts = df.agg("count")["px"] stats = counts.agg(["count", "min", "median", "max"]).to_numpy() summary.loc[1] = [ "Umpire (39),\newline Ball count ([0,2], {3}),\newline Strike count ([0,1], {2})", *stats ] # movement df = pitchfx.group_by(umpire_HP="all", pfx_x=[-60, 0, 60], pfx_z=[-20, 5, 20]) counts = df.agg("count")["px"] stats = counts.agg(["count", "min", "median", "max"]).to_numpy() summary.loc[2] = [ "Umpire (39),\newline Horiz. movement (inward, outward),\newline Vert. movement (upward, downward)", *stats ] # batter/pitcher
# classifiers from models.classification.kernel_logistic_regression import KernelLogisticRegression from sklearn.svm import SVC from pygam import LogisticGAM, te from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier from sklearn.neural_network import MLPClassifier from models.classification.polynomial_logistic_regression import PolynomialLogisticRegression sys.path.extend(['/home/simon/Documents/601-Project/code']) out_file = "./data/models/classifiers/umpire_pitchers_batters_auc_roc_svc_klr.txt" pitchfx = PitchFxDataset() df = pitchfx.group_by( umpire_HP="all", p_throws="all", stand="all" ) szl = StrikeZoneLearner(scoring="roc_auc") classifiers = [] # add SVC svc = SVC(probability=True) svc_params = { "C": np.logspace(-1, 1, 7), "gamma": np.logspace(-1, 0.3, 7), "class_weight": ["balanced"] } classifiers.append((svc, svc_params))
# ---------------- SETUP ------------------ plt.style.use("seaborn") sys.path.extend(['/home/simon/Documents/601-Project/code']) encoder_path = "./data/models/encoding/all_fit.txt" pitchfx = PitchFxDataset() # Balls and strike counts pitchfx.pitchfx[ "score_diff_b_p"] = pitchfx.pitchfx["b_score"] - pitchfx.pitchfx["p_score"] df = pitchfx.group_by(umpire_HP="all", score_diff_b_p=[-25, -2, 1, 25], inning=[1, 6, 18]) with open(encoder_path, "rb") as f: _, embeddings, groups, _, _ = pickle.load(f) ids = [groups.index(gr) for gr, _ in df if gr in groups] embeddings = embeddings[ids, :] groups = [groups[i] for i in ids] df = pd.DataFrame(embeddings, index=pd.MultiIndex.from_tuples(groups)).reset_index() df.columns = ["umpire", "score", "inning", *["c" + str(i) for i in range(10)]] # ------------ MANOVA -------------------