Exemplo n.º 1
0
def _calculate_normalized_play_pct(positon_players, normalized_data):
    for player in positon_players.values():
        rbiz_successes = player["routinebizfield"] / player["routinebiz"] * normalized_data["routinebiz"] if player["routinebiz"] > 0 else 0
        lbiz_successes = player["likelybizfield"] / player["likelybiz"] * normalized_data["likelybiz"] if player["likelybiz"] > 0 else 0
        ebiz_successes = player["evenbizfield"] / player["evenbiz"] * normalized_data["evenbiz"] if player["evenbiz"] > 0 else 0
        ulbiz_successes = player["unlikelybizfield"] / player["unlikelybiz"] * normalized_data["unlikelybiz"] if player["unlikelybiz"] > 0 else 0
        vulbiz_successes = player["veryunlikelybizfield"] / player["veryunlikelybiz"] * normalized_data["veryunlikelybiz"] if player["veryunlikelybiz"] > 0 else 0
        player["adj_play_pct"] = (rbiz_successes + lbiz_successes + ebiz_successes + ulbiz_successes + vulbiz_successes) / normalized_data["totbiz"] if normalized_data["totbiz"] > 0 else 0

        player["adj_fieldedballs"] = (player["routinebizfield"] + player["likelybizfield"] + player["evenbizfield"] + player["unlikelybizfield"] 
            + player["veryunlikelybizfield"]) / ip_to_num(player["fieldingip"]) * 9 * 162 if player["fieldingip"] > 0 else 0

        player["adj_totbiz"] = player["totalbiz"] / ip_to_num(player["fieldingip"]) * 9 * 162 if player["fieldingip"] > 0 else 0

        player["adj_zr"] = player["zr"] / ip_to_num(player["fieldingip"]) * 9 * 162 if player["fieldingip"] > 0 else 0
Exemplo n.º 2
0
def calculate_league_stats(ovr_data, vl_data, vr_data, splits):
    tot_batr = 0.0
    tot_bsr = 0.0
    tot_zr = 0.0
    tot_pas = 0
    for player_data in ovr_data.values():
        tot_batr += player_data["batr"]
        tot_bsr += player_data["baserunningruns"]
        tot_zr += player_data["zr"]
        tot_pas += player_data["pa"]
    league_adj = -1 * (tot_batr + tot_bsr + tot_zr) / tot_pas
    replacement_player_adj = 228000 / 185553

    batter_stats = {
        "avg":
        lambda pd: pd["hits"] / pd["ab"] if pd["ab"] > 0 else 0,
        "obp":
        lambda pd:
        (pd["hits"] + pd["walks"] + pd["timeshitbypitch"]) / pd["pa"]
        if pd["pa"] > 0 else 0,
        "ops":
        lambda pd: (pd["hits"] + pd["doubles"] + pd["triples"] * 2 + pd[
            "homeruns"] * 3) / pd["ab"] +
        (pd["hits"] + pd["walks"] + pd["timeshitbypitch"]) / pd["pa"]
        if pd["ab"] > 0 else 0,
        "bsr_600_pa":
        lambda pd: ovr_data[str(pd["t_CID"])][
            "baserunningruns"] * 600 / ovr_data[str(pd["t_CID"])]["pa"]
        if pd["pa"] > 0 else 0,
        "zr_600_pa":
        lambda pd: pd["zr"] * 600 / pd["pa"] if pd["pa"] > 0 else 0,
        "batr_600_pa":
        lambda pd: pd["batr"] * 600 / pd["pa"] if pd["pa"] > 0 else 0,
        # last number is replacement adjustment, so it's not based on average
        "war_600_pa":
        lambda pd: (pd["batr"] + pd["zr"] + pd["baserunningruns"] + league_adj
                    + replacement_player_adj) * 600 / (pd["pa"] * 10)
        if pd["pa"] > 0 else 0
    }

    pitcher_stats = {
        "sp_k_per_9":
        lambda pd: pd["sp_k"] * 9 / ip_to_num(pd["sp_ip"])
        if pd["sp_ip"] > 0 else 0,
        "sp_bb_with_hbp_per_9":
        lambda pd:
        (pd["sp_bb"] + pd["sp_playershitbypitch"]) * 9 / ip_to_num(pd["sp_ip"])
        if pd["sp_ip"] > 0 else 0,
        "sp_hr_per_9":
        lambda pd: pd["sp_hra"] * 9 / ip_to_num(pd["sp_ip"])
        if pd["sp_ip"] > 0 else 0,
        "rp_k_per_9":
        lambda pd: pd["rp_k"] * 9 / ip_to_num(pd["rp_ip"])
        if pd["rp_ip"] > 0 else 0,
        "rp_bb_with_hbp_per_9":
        lambda pd:
        (pd["rp_bb"] + pd["rp_playershitbypitch"]) * 9 / ip_to_num(pd["rp_ip"])
        if pd["rp_ip"] > 0 else 0,
        "rp_hr_per_9":
        lambda pd: pd["rp_hra"] * 9 / ip_to_num(pd["rp_ip"])
        if pd["rp_ip"] > 0 else 0,
        "ip_per_gamesstarted":
        lambda pd: pd["sp_ip"] / pd["gamesstarted"]
        if pd["gamesstarted"] > 0 and pd["gamesstarted"] == pd["games"] else 0,
        "ip_per_gamesrelieved":
        lambda pd: pd["rp_ip"] / pd["games"]
        if pd["games"] > 0 and pd["gamesstarted"] == 0 else 0,
        "sp_war_per_220_ip":
        lambda pd: pd["sp_war"] * 220 / ip_to_num(pd["sp_ip"])
        if pd["sp_ip"] > 0 else -50,
        "rp_war_per_100_ip":
        lambda pd: pd["rp_war"] * 100 / ip_to_num(pd["rp_ip"])
        if pd["rp_ip"] > 0 else 0,
    }

    _calculate_stats_per_data(vl_data, batter_stats, pitcher_stats, "vL")
    _calculate_stats_per_data(vr_data, batter_stats, pitcher_stats, "vR")
    _calculate_stats_per_data(ovr_data, batter_stats, pitcher_stats, "ovr")

    progress_bar = ProgressBar(len(ovr_data.keys()), "Adding extra ovr data")
    for card in ovr_data.values():
        cid = str(card["t_CID"])
        lefty_stats = vl_data[cid] if cid in vl_data else None
        righty_stats = vr_data[cid] if cid in vr_data else None
        if lefty_stats == None or righty_stats == None or lefty_stats[
                "pa"] < 10 or righty_stats["pa"] < 10:
            # Set to some default low values
            card["batr_600_pa_ft"] = -10
            card["batr_600_pa_vr"] = -10
            card["batr_600_pa_vl"] = -10

            card["war_600_pa_ft"] = -10
            card["war_600_pa_vr"] = -10
            card["war_600_pa_vl"] = -10
            progress_bar.increment()
            continue
        position = "catcher" if card["position"] == "C" else "fielder"
        ft_vr_split = splits["FT"]["vR%"][position]
        vr_vr_split = splits["vR"]["vR%"][position]
        vl_vr_split = splits["vL"]["vR%"][position]

        card["batr_600_pa_ft"] = ft_vr_split * righty_stats["batr_600_pa"] + (
            1 - ft_vr_split * lefty_stats["batr_600_pa"])
        card["batr_600_pa_vr"] = vr_vr_split * righty_stats["batr_600_pa"] + (
            1 - vr_vr_split * lefty_stats["batr_600_pa"])
        card["batr_600_pa_vl"] = vl_vr_split * righty_stats["batr_600_pa"] + (
            1 - vl_vr_split * lefty_stats["batr_600_pa"])

        card["war_600_pa_ft"] = ft_vr_split * righty_stats["war_600_pa"] + (
            1 - ft_vr_split * lefty_stats["war_600_pa"])
        card["war_600_pa_vr"] = vr_vr_split * righty_stats["war_600_pa"] + (
            1 - vr_vr_split * lefty_stats["war_600_pa"])
        card["war_600_pa_vl"] = vl_vr_split * righty_stats["war_600_pa"] + (
            1 - vl_vr_split * lefty_stats["war_600_pa"])
        progress_bar.increment()
    progress_bar.finish()
    print()
Exemplo n.º 3
0
def merge_player_data(old_info, new_info):
    if ip_to_num(add_ip(old_info["fieldingip"], new_info["fieldingip"])) > 0:
        old_info["cera"] = (
            old_info["cera"] * ip_to_num(old_info["fieldingip"]) +
            new_info["cera"] * ip_to_num(new_info["fieldingip"])) / ip_to_num(
                add_ip(old_info["fieldingip"], new_info["fieldingip"]))
    if (new_info["sp_bf"] + old_info["sp_bf"] > 0):
        old_info["sp_fip"] = (new_info["sp_bf"] * new_info["sp_fip"] +
                              old_info["sp_bf"] * old_info["sp_fip"]) / (
                                  new_info["sp_bf"] + old_info["sp_bf"])
    if (new_info["rp_bf"] + old_info["rp_bf"] > 0):
        old_info["rp_fip"] = (new_info["rp_bf"] * new_info["rp_fip"] +
                              old_info["rp_bf"] * old_info["rp_fip"]) / (
                                  new_info["rp_bf"] + old_info["rp_bf"])
        old_info["leverage"] = (new_info["rp_bf"] * new_info["leverage"] +
                                old_info["rp_bf"] * old_info["leverage"]) / (
                                    new_info["rp_bf"] + old_info["rp_bf"])
    if (new_info["pa"] + old_info["pa"] > 0):
        old_info["woba"] = (new_info["pa"] * new_info["woba"] +
                            old_info["pa"] * old_info["woba"]) / (
                                new_info["pa"] + old_info["pa"])
    if ip_to_num(add_ip(old_info["sp_ip"], new_info["sp_ip"])) > 0:
        old_info["sp_era"] = (
            old_info["sp_era"] * ip_to_num(old_info["sp_ip"]) +
            new_info["sp_era"] * ip_to_num(new_info["sp_ip"])) / ip_to_num(
                add_ip(old_info["sp_ip"], new_info["sp_ip"]))
        old_info["sp_whip"] = (
            old_info["sp_whip"] * ip_to_num(old_info["sp_ip"]) +
            new_info["sp_whip"] * ip_to_num(new_info["sp_ip"])) / ip_to_num(
                add_ip(old_info["sp_ip"], new_info["sp_ip"]))
    if ip_to_num(add_ip(old_info["rp_ip"], new_info["rp_ip"])) > 0:
        old_info["rp_era"] = (
            old_info["rp_era"] * ip_to_num(old_info["rp_ip"]) +
            new_info["rp_era"] * ip_to_num(new_info["rp_ip"])) / ip_to_num(
                add_ip(old_info["rp_ip"], new_info["rp_ip"]))
        old_info["rp_whip"] = (
            old_info["rp_whip"] * ip_to_num(old_info["rp_ip"]) +
            new_info["rp_whip"] * ip_to_num(new_info["rp_ip"])) / ip_to_num(
                add_ip(old_info["rp_ip"], new_info["rp_ip"]))
    old_info["sp_hra"] += new_info["sp_hra"]
    old_info["sp_hitsallowed"] += new_info["sp_hitsallowed"]
    old_info["sp_k"] += new_info["sp_k"]
    old_info["sp_bb"] += new_info["sp_bb"]
    old_info["rp_hra"] += new_info["rp_hra"]
    old_info["rp_hitsallowed"] += new_info["rp_hitsallowed"]
    old_info["rp_k"] += new_info["rp_k"]
    old_info["rp_bb"] += new_info["rp_bb"]
    old_info["pa"] += new_info["pa"]
    old_info["gs"] += new_info["gs"]
    old_info["g"] += new_info["g"]
    old_info["fielding_gs"] += new_info["fielding_gs"]
    old_info["runsscored"] += new_info["runsscored"]
    old_info["rbi"] += new_info["rbi"]
    old_info["intentionallywalked"] += new_info["intentionallywalked"]
    old_info["sacflies"] += new_info["sacflies"]
    old_info["sacbunts"] += new_info["sacbunts"]
    old_info["gidp"] += new_info["gidp"]
    old_info["stolenbases"] += new_info["stolenbases"]
    old_info["caughtstealing"] += new_info["caughtstealing"]
    old_info["ab"] += new_info["ab"]
    old_info["sp_ip"] = add_ip(old_info["sp_ip"], new_info["sp_ip"])
    old_info["sp_war"] += new_info["sp_war"]
    old_info["rp_ip"] = add_ip(old_info["rp_ip"], new_info["rp_ip"])
    old_info["rp_war"] += new_info["rp_war"]
    old_info["fieldingip"] = add_ip(old_info["fieldingip"],
                                    new_info["fieldingip"])
    old_info["bwar"] += new_info["bwar"]
    old_info["wraa"] += new_info["wraa"]
    old_info["batr"] += new_info["batr"]
    old_info["sp_bf"] += new_info["sp_bf"]
    old_info["rp_bf"] += new_info["rp_bf"]
    old_info["games"] += new_info["games"]
    old_info["gamesstarted"] += new_info["gamesstarted"]
    old_info["wsb"] += new_info["wsb"]
    old_info["ubr"] += new_info["ubr"]
    old_info["baserunningruns"] += new_info["baserunningruns"]
    old_info["bsrunchances"] += new_info["bsrunchances"]
    old_info["stealchances"] += new_info["stealchances"]
    old_info["zr"] += new_info["zr"]
    old_info["strikeouts"] += new_info["strikeouts"]
    old_info["walks"] += new_info["walks"]
    old_info["homeruns"] += new_info["homeruns"]
    old_info["hits"] += new_info["hits"]
    old_info["doubles"] += new_info["doubles"]
    old_info["triples"] += new_info["triples"]
    old_info["routinebiz"] += new_info["routinebiz"]
    old_info["routinebizfield"] += new_info["routinebizfield"]
    old_info["likelybiz"] += new_info["likelybiz"]
    old_info["likelybizfield"] += new_info["likelybizfield"]
    old_info["evenbiz"] += new_info["evenbiz"]
    old_info["evenbizfield"] += new_info["evenbizfield"]
    old_info["unlikelybiz"] += new_info["unlikelybiz"]
    old_info["unlikelybizfield"] += new_info["unlikelybizfield"]
    old_info["veryunlikelybiz"] += new_info["veryunlikelybiz"]
    old_info["veryunlikelybizfield"] += new_info["veryunlikelybizfield"]
    old_info["impossiblebiz"] += new_info["impossiblebiz"]
    old_info["totalbiz"] += new_info["totalbiz"]
    old_info["totalchances"] += new_info["totalchances"]
    old_info["assists"] += new_info["assists"]
    old_info["putouts"] += new_info["putouts"]
    old_info["stolenbaseattempts"] += new_info["stolenbaseattempts"]
    old_info["runnersthrownout"] += new_info["runnersthrownout"]
    old_info["errors"] += new_info["errors"]
    old_info["doubleplays"] += new_info["doubleplays"]
    old_info["sp_playershitbypitch"] += new_info["sp_playershitbypitch"]
    old_info["rp_playershitbypitch"] += new_info["rp_playershitbypitch"]
    old_info["timeshitbypitch"] += new_info["timeshitbypitch"]
    old_info["totalplaysfielded"] += new_info["totalplaysfielded"]
    old_info["sp_wildpitches"] += new_info["sp_wildpitches"]
    old_info["sp_pitch_stealchances"] += new_info["sp_pitch_stealchances"]
    old_info["sp_pitch_basesstolen"] += new_info["sp_pitch_basesstolen"]
    old_info["sp_pitch_stealattempts"] += new_info["sp_pitch_stealattempts"]
    old_info["sp_pitch_basesstolen_wild_pitch"] += new_info[
        "sp_pitch_basesstolen_wild_pitch"]
    old_info["sp_earned_runs"] += new_info["sp_earned_runs"]
    old_info["sp_pitch_non_hr_hits"] += new_info["sp_pitch_non_hr_hits"]
    old_info["sp_bb_hp"] += new_info["sp_bb_hp"]
    old_info["sp_pitcherdoubleplays"] += new_info["sp_pitcherdoubleplays"]
    old_info["rp_wildpitches"] += new_info["rp_wildpitches"]
    old_info["rp_pitch_stealchances"] += new_info["rp_pitch_stealchances"]
    old_info["rp_pitch_basesstolen"] += new_info["rp_pitch_basesstolen"]
    old_info["rp_pitch_stealattempts"] += new_info["rp_pitch_stealattempts"]
    old_info["rp_pitch_basesstolen_wild_pitch"] += new_info[
        "rp_pitch_basesstolen_wild_pitch"]
    old_info["rp_earned_runs"] += new_info["rp_earned_runs"]
    old_info["rp_pitch_non_hr_hits"] += new_info["rp_pitch_non_hr_hits"]
    old_info["rp_bb_hp"] += new_info["rp_bb_hp"]
    old_info["rp_pitcherdoubleplays"] += new_info["rp_pitcherdoubleplays"]

    if old_info["totalbiz"] > 0:
        old_info[
            "playpct"] = old_info["totalplaysfielded"] / old_info["totalbiz"]
        old_info[
            "routinefieldpct"] = old_info["routinebiz"] / old_info["totalbiz"]
        old_info["routinebizpct"] = old_info["routinebizfield"] / old_info[
            "routinebiz"] if old_info["routinebiz"] > 0 else 0
        old_info["likelybizpct"] = old_info["likelybizfield"] / old_info[
            "likelybiz"] if old_info["likelybiz"] > 0 else 0
        old_info["evenbizpct"] = old_info["evenbizfield"] / old_info[
            "evenbiz"] if old_info["evenbiz"] > 0 else 0
        old_info["unlikelybizpct"] = old_info["unlikelybizfield"] / old_info[
            "unlikelybiz"] if old_info["unlikelybiz"] > 0 else 0
        old_info["veryunlikelybizpct"] = old_info[
            "veryunlikelybizfield"] / old_info["veryunlikelybiz"] if old_info[
                "veryunlikelybiz"] > 0 else 0
    return old_info
Exemplo n.º 4
0
def regress_rto(player_data):
    avg_cabi = 0.0
    avg_carm = 0.0
    avg_ip = 0.0
    attempts_by_arm = {}
    rto_by_arm = {}
    for player_info in player_data.values():
        if player_info["runnersthrownout"] < 1 or player_info[
                "cera"] < 2.0 or player_info["carm"] < 15 or player_info[
                    "fieldingip"] < 10.0:
            continue

        carm = player_info["carm"]
        cabi = player_info["cabi"]
        avg_carm = (avg_carm * avg_ip +
                    carm * ip_to_num(player_info["fieldingip"])) / (
                        avg_ip + ip_to_num(player_info["fieldingip"]))
        avg_cabi = (avg_cabi * avg_ip +
                    cabi * ip_to_num(player_info["fieldingip"])) / (
                        avg_ip + ip_to_num(player_info["fieldingip"]))
        avg_ip += ip_to_num(player_info["fieldingip"])

        if carm in attempts_by_arm:
            attempts_by_arm[carm] = (player_info["stolenbaseattempts"] +
                                     attempts_by_arm[carm][0],
                                     add_ip(player_info["fieldingip"],
                                            attempts_by_arm[carm][1]))
            rto_by_arm[carm] = (player_info["runnersthrownout"] +
                                rto_by_arm[carm][0],
                                player_info["stolenbaseattempts"] +
                                rto_by_arm[carm][1])
        else:
            attempts_by_arm[carm] = (player_info["stolenbaseattempts"],
                                     player_info["fieldingip"])
            rto_by_arm[carm] = (player_info["runnersthrownout"],
                                player_info["stolenbaseattempts"])

    X = []
    y = []
    for carm in attempts_by_arm:
        if attempts_by_arm[carm][1] == 0:
            continue
        X.append(carm)
        y.append(attempts_by_arm[carm][0] / attempts_by_arm[carm][1])
    # initial attempt - we still need to throw out outliers
    att_model = sm.OLS(y, sm.add_constant(X))
    att_results = att_model.fit()
    att_influence = att_results.get_influence()
    att_cooks_distance = att_influence.cooks_distance[0]
    att_cutoff = 4.0 / ((len(X) - 2) if len(X) > 3 else 1)

    att_old_len_X = len(X)
    X = []
    y = []
    i = 0
    for carm in attempts_by_arm:
        if att_cooks_distance[i] < att_cutoff or att_old_len_X < 4:
            X.append(carm)
            y.append(attempts_by_arm[carm][0] / attempts_by_arm[carm][1])
        i += 1
    # Real prediction
    att_model = sm.OLS(y, sm.add_constant(X))
    att_results = att_model.fit()
    X = []
    y = []
    for carm in attempts_by_arm:
        if attempts_by_arm[carm][1] == 0:
            continue
        X.append(carm)
        y.append(rto_by_arm[carm][0] / rto_by_arm[carm][1])

    # initial attempt - we still need to throw out outliers
    rto_model = sm.OLS(y, sm.add_constant(X))
    rto_results = rto_model.fit()
    rto_influence = rto_results.get_influence()
    rto_cooks_distance = rto_influence.cooks_distance[0]
    rto_cutoff = 4.0 / ((len(X) - 2) if len(X) > 3 else 1)

    rto_old_len_X = len(X)
    X = []
    y = []
    i = 0
    for carm in attempts_by_arm:
        if rto_cooks_distance[i] < rto_cutoff or rto_old_len_X < 4:
            X.append(carm)
            y.append(rto_by_arm[carm][0] / rto_by_arm[carm][1])
        i += 1
    # Real prediction
    rto_model = sm.OLS(y, sm.add_constant(X))
    rto_results = rto_model.fit()

    # attempts by arm, r2, rto/att by arm, r2
    return (att_results.params, att_results.rsquared, rto_results.params,
            rto_results.rsquared, avg_carm, avg_cabi)
Exemplo n.º 5
0
def calculate_defensive_stats(tourney_data, cards):
    positions = ["C", "1B", "2B", "3B", "SS", "LF", "CF", "RF"]

    position_players_data = {
        "C": {},
        "1B": {},
        "2B": {},
        "3B": {},
        "SS": {},
        "LF": {},
        "CF": {},
        "RF": {}
    }
    position_biz_breakdown = {
        "C": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 },
        "1B": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 },
        "2B": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 },
        "3B": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 },
        "SS": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 },
        "LF": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 },
        "CF": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 },
        "RF": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 }
    }
    total_fielding_ip_per_pos = {}

    # Use these to calculate avg play%
    tot_biz_per_pos = {
        "C": 0,
        "1B": 0,
        "2B": 0,
        "3B": 0,
        "SS": 0,
        "LF": 0,
        "CF": 0,
        "RF": 0
    }
    tot_fieldedbiz_per_pos = {
        "C": 0,
        "1B": 0,
        "2B": 0,
        "3B": 0,
        "SS": 0,
        "LF": 0,
        "CF": 0,
        "RF": 0
    }

    # Used to calculate outputs
    regression_data = {
        "C": {},
        "1B": {},
        "2B": {},
        "3B": {},
        "SS": {},
        "LF": {},
        "CF": {},
        "RF": {}
    }
    r2_data = {
        "C": {},
        "1B": {},
        "2B": {},
        "3B": {},
        "SS": {},
        "LF": {},
        "CF": {},
        "RF": {}
    }

    progress_bar = ProgressBar(len(tourney_data.keys()), "Reading tourney players for fielding data")
    
    for (key, pi) in tourney_data.items():
        progress_bar.increment()
        player_positions = 0
        for position in positions:
            if pi[position.lower() + "x"] > 0:
                player_positions += 1
        if player_positions > 1:
            continue
        if pi["pos"] not in position_players_data:
            continue
        tot_biz_per_pos[pi["pos"]] += pi["totalbiz"]
        tot_fieldedbiz_per_pos[pi["pos"]] += pi["totalplaysfielded"]

        position_biz_breakdown[pi["pos"]]["totbiz"] += pi["totalbiz"]
        position_biz_breakdown[pi["pos"]]["routinebiz"] += pi["routinebiz"]
        position_biz_breakdown[pi["pos"]]["likelybiz"] += pi["likelybiz"]
        position_biz_breakdown[pi["pos"]]["evenbiz"] += pi["evenbiz"]
        position_biz_breakdown[pi["pos"]]["unlikelybiz"] += pi["unlikelybiz"]
        position_biz_breakdown[pi["pos"]]["veryunlikelybiz"] += pi["veryunlikelybiz"]

        position_biz_breakdown[pi["pos"]]["routinebizfield"] += pi["routinebizfield"]
        position_biz_breakdown[pi["pos"]]["likelybizfield"] += pi["likelybizfield"]
        position_biz_breakdown[pi["pos"]]["evenbizfield"] += pi["evenbizfield"]
        position_biz_breakdown[pi["pos"]]["unlikelybizfield"] += pi["unlikelybizfield"]
        position_biz_breakdown[pi["pos"]]["veryunlikelybizfield"] += pi["veryunlikelybizfield"]

        if pi["pos"] not in total_fielding_ip_per_pos:
            total_fielding_ip_per_pos[pi["pos"]] = 0
        total_fielding_ip_per_pos[pi["pos"]] = add_ip(pi["fieldingip"], total_fielding_ip_per_pos[pi["pos"]])
        position_players_data[pi["pos"]][key] = pi
    progress_bar.finish()

    for position in position_biz_breakdown:
        for key in position_biz_breakdown[position]:
            # Per game stats
            position_biz_breakdown[position][key] = position_biz_breakdown[position][key] / ip_to_num(total_fielding_ip_per_pos[position]) * 9
    
    position_info = [
        ("C", [
            ("adj_play_pct", [
                "carm", 
                "cabi"
            ]),
            ("adj_zr", [
                "carm", 
                "cabi"
            ]),
            ("adj_totbiz", [
                "ifrng"
            ])
        ]),
        ("1B", [
            ("adj_play_pct", [
                "ifrng", 
                "ifarm",  
                "iferr", 
                "tdp",
                "height"
            ]),
            ("adj_zr", [
                "ifrng", 
                "ifarm",  
                "iferr", 
                "tdp",
                "height"
            ]),
            ("adj_totbiz", [
                "ifrng"
            ])
        ]),
        ("2B", [
            ("adj_play_pct", [
                "ifrng", 
                "ifarm",  
                "iferr", 
                "tdp"
            ]),
            ("adj_zr", [
                "ifrng", 
                "ifarm",  
                "iferr", 
                "tdp"
            ]),
            ("adj_totbiz", [
                "ifrng"
            ])
        ]),
        ("3B", [
            ("adj_play_pct", [
                "ifrng", 
                "ifarm",  
                "iferr", 
                "tdp"
            ]),
            ("adj_zr", [
                "ifrng", 
                "ifarm",  
                "iferr", 
                "tdp"
            ]),
            ("adj_totbiz", [
                "ifrng"
            ])
        ]),
        ("SS", [
            ("adj_play_pct", [
                "ifrng", 
                "ifarm",  
                "iferr", 
                "tdp"
            ]),
            ("adj_zr", [
                "ifrng", 
                "ifarm",  
                "iferr", 
                "tdp"
            ]),
            ("adj_totbiz", [
                "ifrng"
            ])
        ]),
        ("LF", [
            ("adj_play_pct", [
                "ofrng", 
                "ofarm", 
                "oferr"
            ]),
            ("adj_zr", [
                "ofrng", 
                "ofarm", 
                "oferr"
            ]),
            ("adj_totbiz", [
                "ofrng"
            ])
        ]),
        ("CF", [
            ("adj_play_pct", [
                "ofrng", 
                "ofarm", 
                "oferr"
            ]),
            ("adj_zr", [
                "ofrng", 
                "ofarm", 
                "oferr"
            ]),
            ("adj_totbiz", [
                "ofrng"
            ])
        ]),
        ("RF", [
            ("adj_play_pct", [
                "ofrng", 
                "ofarm", 
                "oferr"
            ]),
            ("adj_zr", [
                "ofrng", 
                "ofarm", 
                "oferr"
            ]),
            ("adj_totbiz", [
                "ofrng"
            ])
        ])
    ]

    progress_bar = ProgressBar(len(position_info) * len(position_info[0][1]), "Calculating fielding regressions")
    for position, pos_regs in position_info:
        _calculate_normalized_play_pct(position_players_data[position], position_biz_breakdown[position])
        for (outcome_stat, pos_ratings) in pos_regs:
            reg, r2 = regress_defensive_stats(position_players_data[position], pos_ratings, outcome_stat)
            regression_data[position][outcome_stat] = reg
            r2_data[position][outcome_stat] = r2

            progress_bar.increment()
    progress_bar.finish()

    for position in r2_data.keys():
        for outcome_stat in r2_data[position].keys():
            print(position, outcome_stat, "r2:", r2_data[position][outcome_stat])
    print()

    outs_above_average_per_162 = {}
    zr_per_162 = {}
    for position in regression_data.keys():
        outs_above_average_per_162[position] = _get_oaa_fn(position, regression_data, position_biz_breakdown)
        zr_per_162[position] = _get_zr_fn(position, regression_data)

    positions = ["C", "1B", "2B", "3B", "SS", "LF", "CF", "RF"]
    progress_bar = ProgressBar(len(cards), "Calculate in defensive stats to cards")
    for card in cards:
        for position in positions:
            card[position + "_expected_zr"] = zr_per_162[position](card)
            card[position + "_expected_outs_above_avg"] = outs_above_average_per_162[position](card)
        progress_bar.increment()
    progress_bar.finish()
Exemplo n.º 6
0
def _calc_woba_factors(player_data):
    X = []
    y = []
    X_wsb = []
    y_wsb = []
    total_ip = 0
    total_runs = 0
    pa = 0
    walks = 0
    hbp = 0
    singles = 0
    doubles = 0
    triples = 0
    homeruns = 0
    ibb = 0

    progress_bar = ProgressBar(len(player_data.keys()), "Reading wOBA values")

    for player in player_data.values():
        pa += player["pa"]
        walks += player["walks"]
        hbp += player["timeshitbypitch"]
        singles += player["hits"] - (player["homeruns"] + player["doubles"] +
                                     player["triples"])
        doubles += player["doubles"]
        triples += player["triples"]
        homeruns += player["homeruns"]
        ibb += player["intentionallywalked"]

        total_ip = add_ip(add_ip(total_ip, player["sp_ip"]), player["rp_ip"])
        total_runs += player["runsscored"]

        progress_bar.increment()

        if player["pa"] < 20:
            continue
        X.append([
            1, player["walks"], player["timeshitbypitch"], player["hits"] -
            (player["homeruns"] + player["doubles"] + player["triples"]),
            player["doubles"], player["triples"], player["homeruns"]
        ])
        y.append(player["woba"] *
                 (player["pa"] - player["intentionallywalked"]))

        X_wsb.append([
            player["stolenbases"], player["caughtstealing"],
            ((player["hits"] -
              (player["homeruns"] + player["doubles"] + player["triples"])) +
             player["walks"] + player["timeshitbypitch"] -
             player["intentionallywalked"])
        ])
        y_wsb.append(player["wsb"])
    progress_bar.finish("\n")

    results = sm.OLS(y, X).fit()
    wsb_results = sm.OLS(y_wsb, X_wsb).fit()

    avg_woba = (results.params[0] + results.params[1] * walks +
                results.params[2] * hbp + results.params[3] * singles +
                results.params[4] * doubles + results.params[5] * triples +
                results.params[6] * homeruns) / (pa - ibb)
    return {
        "lg_woba": avg_woba,
        "woba_intcpt": results.params[0],
        "walks_factor": results.params[1],
        "hbp_factor": results.params[2],
        "singles_factor": results.params[3],
        "doubles_factor": results.params[4],
        "triples_factor": results.params[5],
        "homeruns_factor": results.params[6],
        "r_2": results.rsquared,
        "runSB": wsb_results.params[0],
        "runCS": wsb_results.params[1],
        "lgwSB": wsb_results.params[2],
        "wsb_r_2": wsb_results.rsquared,
        "outs_per_run": (wsb_results.params[1] - 0.075) / 2,
        "runs_per_win": 9 * (total_runs / ip_to_num(total_ip)) * 1.5 + 3,
    }