Exemplo n.º 1
0
def get_optimal_lineups(year, season_gp):
    optimal_query = """SELECT team_abb, 
    starter_val, bullpen_val, 
    l.lineup_val AS lineup_vsL, r.lineup_val AS lineup_vsR,
    total_val + 0.25*(l.lineup_val) + 0.75*(r.lineup_val) AS roster_WAR,
    starter_var, bullpen_var,
    l.lineup_var AS vsL_var, r.lineup_var AS vsR_var,
    total_var + 0.25*l.lineup_var + 0.75*r.lineup_var AS roster_var
    FROM __optimal_pitching p
    JOIN __optimal_lineups l USING (team_abb)
    JOIN __optimal_lineups r USING (team_abb)
    WHERE l.vs_hand = 'l'
    AND r.vs_hand = 'r'
    AND l.dh_name IS NOT NULL
    AND r.dh_name IS NOT NULL
    ORDER BY team_abb ASC;"""

    total_roster_war_query = """SELECT
    SUM(p.total_val + 0.25*(l.lineup_val) + 0.75*(r.lineup_val)) AS roster_WAR
    FROM __optimal_pitching p
    JOIN __optimal_lineups l USING (team_abb)
    JOIN __optimal_lineups r USING (team_abb)
    WHERE l.vs_hand = 'l'
    AND r.vs_hand = 'r'
    AND l.dh_name IS NOT NULL
    AND r.dh_name IS NOT NULL;"""

    # should be around ~1000
    total_roster_war = db.query(total_roster_war_query)[0][0]

    # should be around 48 (48-114 replacement level?)
    replacement_team_wins = (2430-float(total_roster_war))/30

    # should be around .300
    rep_team_win_pct = float(replacement_team_wins)/162

    optimal_res = db.query(optimal_query)

    for row in optimal_res:
        entry = {}
        team_abb, starter_val, bullpen_val, lu_vsL, lu_vsR, roster_WAR, starter_var, bullpen_var, vsL_var, vsR_var, roster_var = row

        mascot_name = helper.get_mascot_names(team_abb.upper(), year)
        team_name, games_played, rep_WAR, oWAR, dWAR, FIP_WAR, W, L, py_W, py_L = get_standing_metrics(year, mascot_name)
        team_abb = helper.get_team_abb(team_name, year)
        # mascot_name = helper.get_mascot_names(team_abb.upper(), year-1)
        # team_name, games_played, rep_WAR, oWAR, dWAR, FIP_WAR, W, L, py_W, py_L = get_standing_metrics(year, mascot_name)
        # team_abb = helper.get_team_abb(team_name, year-1)


        games_played = float(games_played)

        if games_played > 162.0:
            roster_W = float(roster_WAR) + rep_team_win_pct*games_played
            roster_pct = roster_W/games_played
            ros_g = 0
        else:
            roster_W = float(roster_WAR) + rep_team_win_pct*162
            roster_pct = roster_W/162.0
            ros_g = 162-games_played

        try:
            w_pct = float(W)/float(W+L)
            py_pct = float(py_W)/float(py_W+py_L)
        except ZeroDivisionError:
            w_pct = 0.5
            py_pct = 0.5


        # logistic weights for pythag% and win&
        # rest of season projected win% = (1-2w)*(roster%) + w(pythag%) + w(win%)
        # where w = (0.25) / (1+20e^(-0.035*games_played))
        if games_played <= 10:
            current_weight = 0.0015*float(games_played)
        else:
            current_weight = 0.25 / (1 + 20*math.exp(-0.035*float(games_played)))

        ros_pct = (1-2*current_weight)*roster_pct + (current_weight)*max(py_pct, 0.25) + (current_weight)*max(w_pct, 0.25)

        ros_W = ros_pct*ros_g


        # for the total amount of variance for the team, we first take the total amount of variance from team projections (based on the variance in each individual player's projection)
        total_roster_var = float(roster_var) 
        # then we add a measure of variance based on the difference between true talent record (pythag record) and observed record (see /variance_research/Full Season Pythag Standings std research.png)
        total_roster_var += -0.0055021865*(ros_pct*162) + 3.4706743014
        # Finally we add a value of 5.0 to the STANDARD DEVIATION (not variance). We can express the amount of variance desired to add in the set of equations {std = sqrt(v), std+5.0 = sqrt(v+c)}, and then solving for c (https://tinyurl.com/y8tk64ez)
        # NB. the value of 5.0 is a guess (~0.33 win for each starter plus a small amount for bench players and relief pitchers) and hack-y and should be cleaned up, or at least weighted more towards defensive #s over wOBA numbers) wins to the variance due to my uncertain nature (mostly from defense) of my conversion from raw ZiPS to DMB WAR (i.e., I think if my projection says the team is a true talent 90 win team, I think there is +/- 5.0 wins of standard deviation in that projection)
        total_roster_var += 10*math.sqrt(total_roster_var) + 25

        



        projected_W = W + ros_W
        

        if games_played > 162.0:
            roster_L = games_played - roster_W
            projected_L = games_played - projected_W
            projected_pct = projected_W/games_played
        else:
            roster_L = 162.0 - roster_W
            projected_L = 162.0 - projected_W
            projected_pct = projected_W/162.0

        entry['team_abb'] = team_abb
        entry['team_name'] = team_name
        entry['year'] = year
        entry['season_gp'] = season_gp
        entry['games_played'] = games_played
        entry['starter_val'] = starter_val
        entry['bullpen_val'] = bullpen_val
        entry['vsR_val'] = lu_vsR
        entry['vsL_val'] = lu_vsL
        entry['roster_strength'] = roster_WAR
        entry['starter_var'] = starter_var
        entry['bullpen_var'] = bullpen_var
        entry['vsR_var'] = vsR_var
        entry['vsL_var'] = vsL_var
        entry['roster_var'] = roster_var
        entry['overall_var'] = total_roster_var    
        entry['roster_W'] = roster_W
        entry['roster_L'] = roster_L
        entry['roster_pct'] = roster_pct
        entry['current_W'] = W
        entry['current_L'] = L
        entry['current_pct'] = w_pct
        entry['ros_W'] = ros_W
        entry['ros_L'] = ros_g - ros_W
        entry['ros_pct'] = ros_pct
        entry['projected_W'] = projected_W
        entry['projected_L'] = projected_L
        entry['projected_pct'] = projected_pct

        # raw_input(entry)
        db.insertRowDict(entry, '__team_strength', insertMany=False, replace=True, rid=0,debug=1)
        db.conn.commit()
Exemplo n.º 2
0
def process():
    print "processed_team_standings_advanced"
    table = 'processed_team_standings_advanced'
    db.query("TRUNCATE TABLE `" + table + "`")

    entries = []
    teamWAR_qry = """SELECT
    year,
    team_abb,
    dWAR,
    oWAR,
    (replacement/10) as repWAR,
    FIP_WAR,
    ERA_WAR
    FROM processed_WAR_team
    """

    team_WAR_list = db.query(teamWAR_qry)

    for team in team_WAR_list:
        year, team_abb, dWAR, oWAR, repWAR, FIP_WAR, ERA_WAR = team

        mascot_name = helper.get_mascot_names(team_abb.upper(), year)

        #a full season is ~17 replacement wins?
        repWAR = float(repWAR)

        pos_WAR = float(dWAR) + float(oWAR) + repWAR
        fWAR = pos_WAR + float(FIP_WAR)
        rWAR = pos_WAR + float(ERA_WAR)

        if team_abb == '':
            continue
        else:
            record_q = """SELECT
    year,
    team_name, 
    games_played, 
    w,
    l,
    rf,
    ra
    FROM team_standings
    WHERE team_name LIKE '%%%s%%'
    AND year = %s
    AND games_played = (SELECT MAX(games_played) FROM team_standings WHERE team_name LIKE '%%%s%%' AND year = %s)
    """
            record_qry = record_q % (mascot_name, year, mascot_name, year)
            # raw_input(record_qry)

            record = db.query(record_qry)[0]

            year, team_name, games_played, w, l, rf, ra = record

            # http://www.had2know.com/sports/pythagorean-expectation-win-percentage-baseball.html
            pythag_x = ((float(rf) + float(ra)) /
                        (float(w) + float(l)))**(float(0.285))
            pythag_win_pct = (float(rf)**pythag_x) / ((float(rf)**pythag_x) +
                                                      (float(ra)**pythag_x))
            pythag_wins = (w + l) * pythag_win_pct
            pythag_losses = games_played - (pythag_wins)

            if year < 2017:
                rep_team_win_pct = 0.300
            else:
                rep_team_win_pct = 0.325
            rep_team_wins = rep_team_win_pct * games_played

            # f_wins = (pos_WAR/repWAR)*17.0 + float(FIP_WAR) + rep_team_wins
            # f_losses = games_played - (f_wins)
            # r_wins = (pos_WAR/repWAR)*17.0 + float(ERA_WAR) + rep_team_wins
            # r_losses = games_played - (r_wins)

            f_wins = fWAR + rep_team_wins
            f_losses = games_played - (f_wins)
            r_wins = rWAR + rep_team_wins
            r_losses = games_played - (r_wins)

            entry = {
                "year": year,
                "team_name": team_name,
                "games_played": games_played,
                "repWAR": repWAR,
                "oWAR": oWAR,
                "dWAR": dWAR,
                "FIP_WAR": FIP_WAR,
                "ERA_WAR": ERA_WAR,
                "RF": rf,
                "RA": ra,
                "f_Wins": f_wins,
                "f_Losses": f_losses,
                "r_Wins": r_wins,
                "r_Losses": r_losses,
                "py_Wins": pythag_wins,
                "py_Losses": pythag_losses,
                "W": w,
                "L": l
            }

            entries.append(entry)

    if entries != []:
        db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0)
    db.conn.commit()