Пример #1
0
def get_table_row_dict(soup, table_name, table_row_label, table_column_label):
    results_table = soup.find("table", {"id": table_name})
    if results_table is None:
        raise BaseballReference.TableNotFound(table_name)

    table_header_list = results_table.find("thead").findAll("th")
    table_header_list = [x.text for x in table_header_list]
    stat_rows = results_table.find("tbody").findAll("tr")

    for stat_row in stat_rows:
        # Create a dictionary of the stat attributes
        stat_dict = dict()
        stat_entries = stat_row.findAll("td")
        for i in range(0, len(table_header_list)):
            if stat_entries[i].text == "":
                stat_dict[table_header_list[i]] = 0
            else:
                stat_dict[table_header_list[i]] = stat_entries[i].text
        try:
            if stat_dict[table_column_label] == table_row_label:
                return stat_dict
        # We have reached the end of the year-by-year stats, just end
        except ValueError:
            break

    #TODO: add a TableRowNotFound exception
    raise BaseballReference.TableNotFound(table_name)
Пример #2
0
def update_pitcher_id(pitcher, database_session):
    pitcher_soup = BaseballReference.get_pitcher_soup()
    name = pitcher.name.split()
    first_name = name[0]
    last_name = " ".join(str(x) for x in name[1:len(name)])
    db_query = database_session.query(PitcherEntry).get(pitcher.rotowire_id)
    # Found unique entry, check to make sure the team matches the database
    if db_query is not None:
        if db_query.team == pitcher.team:
            return
        # Update the player's team in the database
        else:
            db_query.team = pitcher.team
            database_session.commit()
    # Found no entries, create a bare bones entry with just the name and id
    else:
        try:
            baseball_reference_id = BaseballReference.get_pitcher_id(
                first_name + " " + last_name,
                BaseballReference.team_dict.inv[team_dict[pitcher.team]],
                date.today().year, pitcher_soup)
        except BaseballReference.NameNotFound:
            print "Skipping committing this pitcher '%s %s'." % (first_name,
                                                                 last_name)
            return

        create_new_pitcher_entry(pitcher, baseball_reference_id,
                                 database_session)
Пример #3
0
def update_pitcher_id(pitcher, database_session):
    pitcher_soup = BaseballReference.get_pitcher_soup()
    name = pitcher.name.split()
    first_name = name[0]
    last_name = " ".join(str(x) for x in name[1:len(name)])
    db_query = database_session.query(PitcherEntry).get(pitcher.rotowire_id)
    # Found unique entry, check to make sure the team matches the database
    if db_query is not None:
        if db_query.team == pitcher.team:
            return
        # Update the player's team in the database
        else:
            db_query.team = pitcher.team
            database_session.commit()
    # Found no entries, create a bare bones entry with just the name and id
    else:
        try:
            baseball_reference_id = BaseballReference.get_pitcher_id(first_name + " " + last_name,
                                                                     BaseballReference.team_dict.inv[team_dict[pitcher.team]],
                                                                     date.today().year,
                                                                     pitcher_soup)
        except BaseballReference.NameNotFound:
            print "Skipping committing this pitcher '%s %s'." % (first_name, last_name)
            return

        create_new_pitcher_entry(pitcher, baseball_reference_id, database_session)
Пример #4
0
def get_game_lineups(database_session):
    """ Mine the RotoWire daily lineups page and get the players' name, team, and RotoWire ID
    Note: longer names are abbreviated by RotoWire and need to be resolved by another source
    :return: list of Game objects representing the lineups for the day
    """
    #TODO: add feature to look if it's going to rain
    lineup_soup = BeautifulSoupHelper.get_soup_from_url(DAILY_LINEUPS_URL)
    header_nodes = lineup_soup.findAll("div", {"class": TEAM_REGION_LABEL})
    games = list()
    for header_node in header_nodes:
        game_node = header_node.parent
        home_team_lineup = list()
        away_team_lineup = list()
        away_team_abbreviation = game_node.find("div", {
            "class": AWAY_TEAM_REGION_LABEL
        }).text.split()[0]
        home_team_abbreviation = game_node.find("div", {
            "class": HOME_TEAM_REGION_LABEL
        }).text.split()[0]
        game_main_soup = game_node.find("div", {"class": LINEUPS_CLASS_LABEL})

        for away_player in game_main_soup.findAll(
                "div", {"class": AWAY_TEAM_PLAYER_LABEL}):
            away_team_lineup.append(
                get_hitter(away_player, away_team_abbreviation,
                           database_session))
        for home_player in game_main_soup.findAll(
                "div", {"class": HOME_TEAM_PLAYER_LABEL}):
            home_team_lineup.append(
                get_hitter(home_player, home_team_abbreviation,
                           database_session))

        try:
            pitchers = game_node.find("div",
                                      PITCHERS_REGION_LABEL).findAll("div")
            away_team_pitcher = get_pitcher(pitchers[0],
                                            away_team_abbreviation,
                                            database_session)
            home_team_pitcher = get_pitcher(pitchers[1],
                                            home_team_abbreviation,
                                            database_session)
        # No pitchers present on page
        except AttributeError:
            print "Game between %s and %s is not valid." % (
                away_team_abbreviation, home_team_abbreviation)
            continue

        current_game = Game(away_team_lineup, away_team_pitcher,
                            home_team_lineup, home_team_pitcher)

        # TODO: since they only release the ump data ~1 hour before the game, we'll have to make this robust later
        try:
            game_time = game_node.find("div", {
                "class": TIME_REGION_LABEL
            }).find("a").text.replace("ET", "").strip()
            game_time = datetime.strptime(game_time,
                                          '%I:%M %p').strftime("%H:%M")
            game_entry = GameEntry(date.today(), game_time,
                                   home_team_abbreviation,
                                   away_team_abbreviation)
            game_entry.wind_speed = get_wind_speed(game_node)
            game_entry.ump_ks_per_game = get_ump_ks_per_game(game_node)
            game_entry.ump_runs_per_game = get_ump_runs_per_game(game_node)
            game_entry.park_hitter_score, game_entry.park_pitcher_score = BaseballReference.get_team_info(
                team_dict[home_team_abbreviation])

            database_session.add(game_entry)
            database_session.commit()
        except IntegrityError:
            database_session.rollback()
            print "Warning: attempt to duplicate game entry: %s %s %s %s" % (
                str(home_team_abbreviation), str(away_team_abbreviation),
                str(game_entry.game_date), str(game_entry.game_time))
        except Exception as e:
            print e
            pass

        if current_game.is_valid():
            games.append(current_game)
        else:
            print "Game between %s and %s is not valid." % (
                away_team_abbreviation, home_team_abbreviation)

    return games
Пример #5
0
def mine_yesterdays_results(database_session):
    # Query the database for all hitter game entries from yesterday
    hitter_entries = database_session.query(PregameHitterGameEntry).filter(
        PregameHitterGameEntry.game_date == (date.today() - timedelta(days=1)))
    for pregame_hitter_entry in hitter_entries:
        hitter_entry = database_session.query(HitterEntry).get(
            pregame_hitter_entry.rotowire_id)
        try:
            stat_row_dict = BaseballReference.get_yesterdays_hitting_game_log(
                hitter_entry.baseball_reference_id)
        except BaseballReference.TableRowNotFound:
            print "Player %s %s did not play yesterday. Deleting pregame entry %s %s" % (
                hitter_entry.first_name, hitter_entry.last_name,
                pregame_hitter_entry.game_date,
                pregame_hitter_entry.opposing_team)
            database_session.delete(pregame_hitter_entry)
            database_session.commit()
            continue

        postgame_hitter_entry = PostgameHitterGameEntry()
        postgame_hitter_entry.rotowire_id = hitter_entry.rotowire_id
        postgame_hitter_entry.game_date = pregame_hitter_entry.game_date
        postgame_hitter_entry.game_h = int(stat_row_dict["H"])
        postgame_hitter_entry.game_bb = int(stat_row_dict["BB"])
        postgame_hitter_entry.game_hbp = int(stat_row_dict["HBP"])
        postgame_hitter_entry.game_r = int(stat_row_dict["R"])
        postgame_hitter_entry.game_sb = int(stat_row_dict["SB"])
        postgame_hitter_entry.game_hr = int(stat_row_dict["HR"])
        postgame_hitter_entry.game_rbi = int(stat_row_dict["RBI"])
        postgame_hitter_entry.game_2b = int(stat_row_dict["2B"])
        postgame_hitter_entry.game_3b = int(stat_row_dict["3B"])
        postgame_hitter_entry.game_1b = postgame_hitter_entry.game_h - postgame_hitter_entry.game_2b - \
                                        postgame_hitter_entry.game_3b - postgame_hitter_entry.game_hr
        postgame_hitter_entry.actual_draftkings_points = Draftkings.get_hitter_points(
            postgame_hitter_entry)
        try:
            database_session.add(postgame_hitter_entry)
            database_session.commit()
        except IntegrityError:
            database_session.rollback()
            print "Attempt to duplicate hitter postgame results: %s %s %s %s" % (
                hitter_entry.first_name, hitter_entry.last_name,
                hitter_entry.team, pregame_hitter_entry.game_date)

    # Query the database for all hitter game entries from yesterday
    pitcher_entries = database_session.query(PregamePitcherGameEntry).filter(
        PregamePitcherGameEntry.game_date == (date.today() -
                                              timedelta(days=1)))
    for pregame_pitcher_entry in pitcher_entries:
        pitcher_entry = database_session.query(PitcherEntry).get(
            pregame_pitcher_entry.rotowire_id)
        print "Mining yesterday for %s %s" % (pitcher_entry.first_name,
                                              pitcher_entry.last_name)
        try:
            stat_row_dict = BaseballReference.get_pitching_game_log(
                pitcher_entry.baseball_reference_id)
        except BaseballReference.TableRowNotFound:
            print "Player %s %s did not play yesterday. Deleting pregame entry %s %s" % (
                pitcher_entry.first_name, pitcher_entry.last_name,
                pregame_pitcher_entry.game_date,
                pregame_pitcher_entry.opposing_team)
            database_session.delete(pregame_pitcher_entry)
            database_session.commit()
            continue

        postgame_pitcher_entry = PostgamePitcherGameEntry()
        postgame_pitcher_entry.rotowire_id = pitcher_entry.rotowire_id
        postgame_pitcher_entry.game_date = pregame_pitcher_entry.game_date
        postgame_pitcher_entry.game_ip = float(stat_row_dict["IP"])
        postgame_pitcher_entry.game_so = int(stat_row_dict["SO"])
        if str(stat_row_dict["Dec"])[0] == "W":
            postgame_pitcher_entry.game_wins = 1
        postgame_pitcher_entry.game_er = int(stat_row_dict["ER"])
        postgame_pitcher_entry.game_er = int(stat_row_dict["ER"])
        postgame_pitcher_entry.game_h = int(stat_row_dict["H"])
        postgame_pitcher_entry.game_bb = int(stat_row_dict["BB"])
        postgame_pitcher_entry.game_hbp = int(stat_row_dict["HBP"])
        if stat_row_dict["Inngs"] == "CG":
            postgame_pitcher_entry.game_cg = 1
        if stat_row_dict["Inngs"] == "SHO":
            postgame_pitcher_entry.game_cgso = 1
        if postgame_pitcher_entry.game_cg == 1 and postgame_pitcher_entry.game_h == 0:
            postgame_pitcher_entry.game_no_hitter = 1
        postgame_pitcher_entry.actual_draftkings_points = Draftkings.get_pitcher_points(
            postgame_pitcher_entry)
        try:
            database_session.add(postgame_pitcher_entry)
            database_session.commit()
        except IntegrityError:
            database_session.rollback()
            print "Attempt to duplicate pitcher postgame results: %s %s %s %s" % (
                pitcher_entry.first_name, pitcher_entry.last_name,
                pregame_pitcher_entry.opposing_team,
                postgame_pitcher_entry.game_date)
Пример #6
0
def get_pitcher_stats(pitcher_id,
                      team,
                      opposing_team,
                      database_session,
                      game_date=None):
    """ Get the career, last 14 day, vs hand stats from the RotoWire player page, get t
    :param rotowire_id: the RotoWire unique ID of this player
    :param pitcher_hand: a str representation of the hand the pitcher throws with ("L" or "R")
    :return: a PregameHitterGameEntry object without the predicted_draftkings_points field populated
    """
    pregame_pitcher_entry = PregamePitcherGameEntry()
    pregame_pitcher_entry.rotowire_id = pitcher_id
    pregame_pitcher_entry.team = team
    pregame_pitcher_entry.opposing_team = opposing_team
    if game_date is None:
        game_date = date.today()
    pregame_pitcher_entry.game_date = game_date

    # Career stats
    pitcher_entry = database_session.query(PitcherEntry).get(pitcher_id)
    if pitcher_entry is None:
        raise PitcherNotFound(pitcher_id)

    pitcher_career_soup = BaseballReference.get_pitcher_page_career_soup(
        pitcher_entry.baseball_reference_id)
    try:
        career_stats = BaseballReference.get_career_pitching_stats(
            pitcher_entry.baseball_reference_id, pitcher_career_soup)
        pregame_pitcher_entry.career_bf = int(career_stats["BF"])
        pregame_pitcher_entry.career_ip = float(career_stats["IP"])
        pregame_pitcher_entry.career_h = int(career_stats["H"])
        pregame_pitcher_entry.career_hr = int(career_stats["HR"])
        pregame_pitcher_entry.career_er = int(career_stats["ER"])
        pregame_pitcher_entry.career_bb = int(career_stats["BB"])
        pregame_pitcher_entry.career_so = int(career_stats["SO"])
        pregame_pitcher_entry.career_wins = int(career_stats["W"])
        pregame_pitcher_entry.career_losses = int(career_stats["L"])
    except (BaseballReference.TableNotFound,
            BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(pitcher_entry.first_name), str(
            pitcher_entry.last_name)

    opposing_lineup = database_session.query(PregameHitterGameEntry).filter(
        PregameHitterGameEntry.game_date == game_date,
        PregameHitterGameEntry.opposing_team == opposing_team)
    for hitter in opposing_lineup:
        pregame_pitcher_entry.vs_h += hitter.vs_h
        pregame_pitcher_entry.vs_bb += hitter.vs_bb
        pregame_pitcher_entry.vs_so += hitter.vs_so
        pregame_pitcher_entry.vs_hr += hitter.vs_hr
        pregame_pitcher_entry.vs_bf += hitter.vs_pa
        # Approximate earned runs by the RBIs of opposing hitters
        pregame_pitcher_entry.vs_er += hitter.vs_rbi

    # Recent stats
    try:
        recent_stats = BaseballReference.get_recent_pitcher_stats(
            pitcher_entry.baseball_reference_id, pitcher_career_soup)
        pregame_pitcher_entry.recent_bf = int(recent_stats["BF"])
        pregame_pitcher_entry.recent_ip = float(recent_stats["IP"])
        pregame_pitcher_entry.recent_h = int(recent_stats["H"])
        pregame_pitcher_entry.recent_hr = int(recent_stats["HR"])
        pregame_pitcher_entry.recent_er = int(recent_stats["ER"])
        pregame_pitcher_entry.recent_bb = int(recent_stats["BB"])
        pregame_pitcher_entry.recent_so = int(recent_stats["SO"])
        pregame_pitcher_entry.recent_wins = int(recent_stats["W"])
        pregame_pitcher_entry.recent_losses = int(recent_stats["L"])
    except (BaseballReference.TableNotFound,
            BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(pitcher_entry.first_name), str(
            pitcher_entry.last_name)

    #Season stats
    try:
        season_stats = BaseballReference.get_season_pitcher_stats(
            pitcher_entry.baseball_reference_id)
        pregame_pitcher_entry.season_bf = int(season_stats["BF"])
        pregame_pitcher_entry.season_ip = float(season_stats["IP"])
        pregame_pitcher_entry.season_h = int(season_stats["H"])
        pregame_pitcher_entry.season_hr = int(season_stats["HR"])
        pregame_pitcher_entry.season_er = int(season_stats["ER"])
        pregame_pitcher_entry.season_bb = int(season_stats["BB"])
        pregame_pitcher_entry.season_so = int(season_stats["SO"])
        pregame_pitcher_entry.season_wins = int(season_stats["W"])
        pregame_pitcher_entry.season_losses = int(season_stats["L"])
    except (BaseballReference.TableNotFound,
            BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(pitcher_entry.first_name), str(
            pitcher_entry.last_name)

    return pregame_pitcher_entry
Пример #7
0
def get_hitter_stats(batter_id, pitcher_id, team, pitcher_hand,
                     database_session):
    """ Get the career, last 14 day, vs hand stats from the RotoWire player page, get t
    :param rotowire_id: the RotoWire unique ID of this player
    :param pitcher_hand: a str representation of the hand the pitcher throws with ("L" or "R")
    :return: a PregameHitterGameEntry object without the predicted_draftkings_points field populated
    """
    pregame_hitter_entry = PregameHitterGameEntry()
    pregame_hitter_entry.rotowire_id = batter_id
    pregame_hitter_entry.pitcher_id = pitcher_id
    pregame_hitter_entry.team = team

    # Career stats
    hitter_entry = database_session.query(HitterEntry).get(batter_id)
    if hitter_entry is None:
        raise HitterNotFound(batter_id)

    hitter_career_soup = BaseballReference.get_hitter_page_career_soup(
        hitter_entry.baseball_reference_id)
    try:
        career_stats = BaseballReference.get_career_hitting_stats(
            hitter_entry.baseball_reference_id, hitter_career_soup)
        pregame_hitter_entry.career_pa = int(career_stats["PA"])
        pregame_hitter_entry.career_ab = int(career_stats["AB"])
        pregame_hitter_entry.career_r = int(career_stats["R"])
        pregame_hitter_entry.career_h = int(career_stats["H"])
        pregame_hitter_entry.career_hr = int(career_stats["HR"])
        pregame_hitter_entry.career_rbi = int(career_stats["RBI"])
        pregame_hitter_entry.career_sb = int(career_stats["SB"])
        pregame_hitter_entry.career_cs = int(career_stats["CS"])
        pregame_hitter_entry.career_bb = int(career_stats["BB"])
        pregame_hitter_entry.career_so = int(career_stats["SO"])
    #TODO: add ColumnNotFound exception to BaseballReference
    except (BaseballReference.TableNotFound,
            BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(hitter_entry.first_name), str(
            hitter_entry.last_name)

    # Vs hand of the opposing pitcher
    if pitcher_hand == "L":
        pitcher_hand_lr = BaseballReference.HandEnum.LHP
    elif pitcher_hand == "R":
        pitcher_hand_lr = BaseballReference.HandEnum.RHP
    else:
        print "Invalid pitcher hand %i" % pitcher_hand
        assert 0
    try:
        vs_hand_stats = BaseballReference.get_vs_hand_hitting_stats(
            hitter_entry.baseball_reference_id, pitcher_hand_lr,
            hitter_career_soup)
        pregame_hitter_entry.vs_hand_pa = int(vs_hand_stats["PA"])
        pregame_hitter_entry.vs_hand_ab = int(vs_hand_stats["AB"])
        pregame_hitter_entry.vs_hand_r = int(vs_hand_stats["R"])
        pregame_hitter_entry.vs_hand_h = int(vs_hand_stats["H"])
        pregame_hitter_entry.vs_hand_hr = int(vs_hand_stats["HR"])
        pregame_hitter_entry.vs_hand_rbi = int(vs_hand_stats["RBI"])
        pregame_hitter_entry.vs_hand_sb = int(vs_hand_stats["SB"])
        pregame_hitter_entry.vs_hand_cs = int(vs_hand_stats["CS"])
        pregame_hitter_entry.vs_hand_bb = int(vs_hand_stats["BB"])
        pregame_hitter_entry.vs_hand_so = int(vs_hand_stats["SO"])
    except (BaseballReference.TableNotFound,
            BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(hitter_entry.first_name), str(
            hitter_entry.last_name)

    # Recent stats
    try:
        recent_stats = BaseballReference.get_recent_hitting_stats(
            hitter_entry.baseball_reference_id, hitter_career_soup)
        pregame_hitter_entry.recent_pa = int(recent_stats["PA"])
        pregame_hitter_entry.recent_ab = int(recent_stats["AB"])
        pregame_hitter_entry.recent_r = int(recent_stats["R"])
        pregame_hitter_entry.recent_h = int(recent_stats["H"])
        pregame_hitter_entry.recent_hr = int(recent_stats["HR"])
        pregame_hitter_entry.recent_rbi = int(recent_stats["RBI"])
        pregame_hitter_entry.recent_sb = int(recent_stats["SB"])
        pregame_hitter_entry.recent_cs = int(recent_stats["CS"])
        pregame_hitter_entry.recent_bb = int(recent_stats["BB"])
        pregame_hitter_entry.recent_so = int(recent_stats["SO"])
    except (BaseballReference.TableNotFound,
            BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(hitter_entry.first_name), str(
            hitter_entry.last_name)

    #Season stats
    try:
        season_stats = BaseballReference.get_season_hitting_stats(
            hitter_entry.baseball_reference_id)
        pregame_hitter_entry.season_pa = int(season_stats["PA"])
        pregame_hitter_entry.season_ab = int(season_stats["AB"])
        pregame_hitter_entry.season_r = int(season_stats["R"])
        pregame_hitter_entry.season_h = int(season_stats["H"])
        pregame_hitter_entry.season_hr = int(season_stats["HR"])
        pregame_hitter_entry.season_rbi = int(season_stats["RBI"])
        pregame_hitter_entry.season_sb = int(season_stats["SB"])
        pregame_hitter_entry.season_cs = int(season_stats["CS"])
        pregame_hitter_entry.season_bb = int(season_stats["BB"])
        pregame_hitter_entry.season_so = int(season_stats["SO"])
    except (BaseballReference.TableNotFound,
            BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(hitter_entry.first_name), str(
            hitter_entry.last_name)

    # Career versus this pitcher
    pitcher_entry = database_session.query(PitcherEntry).get(
        pregame_hitter_entry.pitcher_id)
    # Couldn't find the pitcher, just continue and use default values
    if pitcher_entry is None:
        return pregame_hitter_entry
    else:
        try:
            vs_pitcher_stats = BaseballReference.get_vs_pitcher_stats(
                hitter_entry.baseball_reference_id,
                pitcher_entry.baseball_reference_id)
            pregame_hitter_entry.vs_pa = int(vs_pitcher_stats["PA"])
            pregame_hitter_entry.vs_ab = int(vs_pitcher_stats["AB"])
            pregame_hitter_entry.vs_h = int(vs_pitcher_stats["H"])
            pregame_hitter_entry.vs_hr = int(vs_pitcher_stats["HR"])
            pregame_hitter_entry.vs_rbi = int(vs_pitcher_stats["RBI"])
            pregame_hitter_entry.vs_bb = int(vs_pitcher_stats["BB"])
            pregame_hitter_entry.vs_so = int(vs_pitcher_stats["SO"])
        except (BaseballReference.TableNotFound,
                BaseballReference.TableRowNotFound) as e:
            print str(e), "with", str(hitter_entry.first_name), str(
                hitter_entry.last_name)

        return pregame_hitter_entry
Пример #8
0
def get_game_lineups(database_session):
    """ Mine the RotoWire daily lineups page and get the players' name, team, and RotoWire ID
    Note: longer names are abbreviated by RotoWire and need to be resolved by another source
    :return: list of Game objects representing the lineups for the day
    """
    #TODO: add feature to look if it's going to rain
    lineup_soup = BeautifulSoupHelper.get_soup_from_url(DAILY_LINEUPS_URL)
    header_nodes = lineup_soup.findAll("div", {"class": TEAM_REGION_LABEL})
    games = list()
    for header_node in header_nodes:
        game_node = header_node.parent
        home_team_lineup = list()
        away_team_lineup = list()
        away_team_abbreviation = game_node.find("div", {"class": AWAY_TEAM_REGION_LABEL}).text.split()[0]
        home_team_abbreviation = game_node.find("div", {"class": HOME_TEAM_REGION_LABEL}).text.split()[0]
        game_main_soup = game_node.find("div", {"class": LINEUPS_CLASS_LABEL})

        for away_player in game_main_soup.findAll("div", {"class": AWAY_TEAM_PLAYER_LABEL}):
            away_team_lineup.append(get_hitter(away_player, away_team_abbreviation, database_session))
        for home_player in game_main_soup.findAll("div", {"class": HOME_TEAM_PLAYER_LABEL}):
            home_team_lineup.append(get_hitter(home_player, home_team_abbreviation, database_session))

        try:
            pitchers = game_node.find("div", PITCHERS_REGION_LABEL).findAll("div")
            away_team_pitcher = get_pitcher(pitchers[0], away_team_abbreviation, database_session)
            home_team_pitcher = get_pitcher(pitchers[1], home_team_abbreviation, database_session)
        # No pitchers present on page
        except AttributeError:
            print "Game between %s and %s is not valid." % (away_team_abbreviation, home_team_abbreviation)
            continue

        current_game = Game(away_team_lineup, away_team_pitcher, home_team_lineup, home_team_pitcher)

        # TODO: since they only release the ump data ~1 hour before the game, we'll have to make this robust later
        try:
            game_time = game_node.find("div", {"class": TIME_REGION_LABEL}).find("a").text.replace("ET", "").strip()
            game_time = datetime.strptime(game_time, '%I:%M %p').strftime("%H:%M")
            game_entry = GameEntry(date.today(), game_time, home_team_abbreviation, away_team_abbreviation)
            game_entry.wind_speed = get_wind_speed(game_node)
            game_entry.ump_ks_per_game = get_ump_ks_per_game(game_node)
            game_entry.ump_runs_per_game = get_ump_runs_per_game(game_node)
            game_entry.park_hitter_score, game_entry.park_pitcher_score = BaseballReference.get_team_info(team_dict[home_team_abbreviation])

            database_session.add(game_entry)
            database_session.commit()
        except IntegrityError:
            database_session.rollback()
            print "Warning: attempt to duplicate game entry: %s %s %s %s" % (str(home_team_abbreviation),
                                                                             str(away_team_abbreviation),
                                                                             str(game_entry.game_date),
                                                                             str(game_entry.game_time))
        except Exception as e:
            print e
            pass

        if current_game.is_valid():
            games.append(current_game)
        else:
            print "Game between %s and %s is not valid." % (away_team_abbreviation, home_team_abbreviation)

    return games
Пример #9
0
def mine_yesterdays_results(database_session):
    # Query the database for all hitter game entries from yesterday
    hitter_entries = database_session.query(PregameHitterGameEntry).filter(PregameHitterGameEntry.game_date == (date.today() - timedelta(days=1)))
    for pregame_hitter_entry in hitter_entries:
        hitter_entry = database_session.query(HitterEntry).get(pregame_hitter_entry.rotowire_id)
        try:
            stat_row_dict = BaseballReference.get_yesterdays_hitting_game_log(hitter_entry.baseball_reference_id)
        except BaseballReference.TableRowNotFound:
            print "Player %s %s did not play yesterday. Deleting pregame entry %s %s" % (hitter_entry.first_name,
                                                                                         hitter_entry.last_name,
                                                                                         pregame_hitter_entry.game_date,
                                                                                         pregame_hitter_entry.opposing_team)
            database_session.delete(pregame_hitter_entry)
            database_session.commit()
            continue

        postgame_hitter_entry = PostgameHitterGameEntry()
        postgame_hitter_entry.rotowire_id = hitter_entry.rotowire_id
        postgame_hitter_entry.game_date = pregame_hitter_entry.game_date
        postgame_hitter_entry.game_h = int(stat_row_dict["H"])
        postgame_hitter_entry.game_bb = int(stat_row_dict["BB"])
        postgame_hitter_entry.game_hbp = int(stat_row_dict["HBP"])
        postgame_hitter_entry.game_r = int(stat_row_dict["R"])
        postgame_hitter_entry.game_sb = int(stat_row_dict["SB"])
        postgame_hitter_entry.game_hr = int(stat_row_dict["HR"])
        postgame_hitter_entry.game_rbi = int(stat_row_dict["RBI"])
        postgame_hitter_entry.game_2b = int(stat_row_dict["2B"])
        postgame_hitter_entry.game_3b = int(stat_row_dict["3B"])
        postgame_hitter_entry.game_1b = postgame_hitter_entry.game_h - postgame_hitter_entry.game_2b - \
                                        postgame_hitter_entry.game_3b - postgame_hitter_entry.game_hr
        postgame_hitter_entry.actual_draftkings_points = Draftkings.get_hitter_points(postgame_hitter_entry)
        try:
            database_session.add(postgame_hitter_entry)
            database_session.commit()
        except IntegrityError:
            database_session.rollback()
            print "Attempt to duplicate hitter postgame results: %s %s %s %s" % (hitter_entry.first_name,
                                                                                 hitter_entry.last_name,
                                                                                 hitter_entry.team,
                                                                                 pregame_hitter_entry.game_date)

    # Query the database for all hitter game entries from yesterday
    pitcher_entries = database_session.query(PregamePitcherGameEntry).filter(PregamePitcherGameEntry.game_date == (date.today() - timedelta(days=1)))
    for pregame_pitcher_entry in pitcher_entries:
        pitcher_entry = database_session.query(PitcherEntry).get(pregame_pitcher_entry.rotowire_id)
        print "Mining yesterday for %s %s" % (pitcher_entry.first_name, pitcher_entry.last_name)
        try:
            stat_row_dict = BaseballReference.get_pitching_game_log(pitcher_entry.baseball_reference_id)
        except BaseballReference.TableRowNotFound:
            print "Player %s %s did not play yesterday. Deleting pregame entry %s %s" % (pitcher_entry.first_name,
                                                                                         pitcher_entry.last_name,
                                                                                         pregame_pitcher_entry.game_date,
                                                                                         pregame_pitcher_entry.opposing_team)
            database_session.delete(pregame_pitcher_entry)
            database_session.commit()
            continue

        postgame_pitcher_entry = PostgamePitcherGameEntry()
        postgame_pitcher_entry.rotowire_id = pitcher_entry.rotowire_id
        postgame_pitcher_entry.game_date = pregame_pitcher_entry.game_date
        postgame_pitcher_entry.game_ip = float(stat_row_dict["IP"])
        postgame_pitcher_entry.game_so = int(stat_row_dict["SO"])
        if str(stat_row_dict["Dec"])[0] == "W":
            postgame_pitcher_entry.game_wins = 1
        postgame_pitcher_entry.game_er = int(stat_row_dict["ER"])
        postgame_pitcher_entry.game_er = int(stat_row_dict["ER"])
        postgame_pitcher_entry.game_h = int(stat_row_dict["H"])
        postgame_pitcher_entry.game_bb = int(stat_row_dict["BB"])
        postgame_pitcher_entry.game_hbp = int(stat_row_dict["HBP"])
        if stat_row_dict["Inngs"] == "CG":
            postgame_pitcher_entry.game_cg = 1
        if stat_row_dict["Inngs"] == "SHO":
            postgame_pitcher_entry.game_cgso = 1
        if postgame_pitcher_entry.game_cg == 1 and postgame_pitcher_entry.game_h == 0:
            postgame_pitcher_entry.game_no_hitter = 1
        postgame_pitcher_entry.actual_draftkings_points = Draftkings.get_pitcher_points(postgame_pitcher_entry)
        try:
            database_session.add(postgame_pitcher_entry)
            database_session.commit()
        except IntegrityError:
            database_session.rollback()
            print "Attempt to duplicate pitcher postgame results: %s %s %s %s" % (pitcher_entry.first_name,
                                                                                  pitcher_entry.last_name,
                                                                                  pregame_pitcher_entry.opposing_team,
                                                                                  postgame_pitcher_entry.game_date)
Пример #10
0
def get_pitcher_stats(pitcher_id, team, opposing_team, database_session, game_date=None):
    """ Get the career, last 14 day, vs hand stats from the RotoWire player page, get t
    :param rotowire_id: the RotoWire unique ID of this player
    :param pitcher_hand: a str representation of the hand the pitcher throws with ("L" or "R")
    :return: a PregameHitterGameEntry object without the predicted_draftkings_points field populated
    """
    pregame_pitcher_entry = PregamePitcherGameEntry()
    pregame_pitcher_entry.rotowire_id = pitcher_id
    pregame_pitcher_entry.team = team
    pregame_pitcher_entry.opposing_team = opposing_team
    if game_date is None:
        game_date = date.today()
    pregame_pitcher_entry.game_date = game_date

    # Career stats
    pitcher_entry = database_session.query(PitcherEntry).get(pitcher_id)
    if pitcher_entry is None:
        raise PitcherNotFound(pitcher_id)

    pitcher_career_soup = BaseballReference.get_pitcher_page_career_soup(pitcher_entry.baseball_reference_id)
    try:
        career_stats = BaseballReference.get_career_pitching_stats(pitcher_entry.baseball_reference_id, pitcher_career_soup)
        pregame_pitcher_entry.career_bf = int(career_stats["BF"])
        pregame_pitcher_entry.career_ip = float(career_stats["IP"])
        pregame_pitcher_entry.career_h = int(career_stats["H"])
        pregame_pitcher_entry.career_hr = int(career_stats["HR"])
        pregame_pitcher_entry.career_er = int(career_stats["ER"])
        pregame_pitcher_entry.career_bb = int(career_stats["BB"])
        pregame_pitcher_entry.career_so = int(career_stats["SO"])
        pregame_pitcher_entry.career_wins = int(career_stats["W"])
        pregame_pitcher_entry.career_losses = int(career_stats["L"])
    except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(pitcher_entry.first_name), str(pitcher_entry.last_name)

    opposing_lineup = database_session.query(PregameHitterGameEntry).filter(PregameHitterGameEntry.game_date == game_date,
                                                                            PregameHitterGameEntry.opposing_team == opposing_team)
    for hitter in opposing_lineup:
        pregame_pitcher_entry.vs_h += hitter.vs_h
        pregame_pitcher_entry.vs_bb += hitter.vs_bb
        pregame_pitcher_entry.vs_so += hitter.vs_so
        pregame_pitcher_entry.vs_hr += hitter.vs_hr
        pregame_pitcher_entry.vs_bf += hitter.vs_pa
        # Approximate earned runs by the RBIs of opposing hitters
        pregame_pitcher_entry.vs_er += hitter.vs_rbi

    # Recent stats
    try:
        recent_stats = BaseballReference.get_recent_pitcher_stats(pitcher_entry.baseball_reference_id, pitcher_career_soup)
        pregame_pitcher_entry.recent_bf = int(recent_stats["BF"])
        pregame_pitcher_entry.recent_ip = float(recent_stats["IP"])
        pregame_pitcher_entry.recent_h = int(recent_stats["H"])
        pregame_pitcher_entry.recent_hr = int(recent_stats["HR"])
        pregame_pitcher_entry.recent_er = int(recent_stats["ER"])
        pregame_pitcher_entry.recent_bb = int(recent_stats["BB"])
        pregame_pitcher_entry.recent_so = int(recent_stats["SO"])
        pregame_pitcher_entry.recent_wins = int(recent_stats["W"])
        pregame_pitcher_entry.recent_losses = int(recent_stats["L"])
    except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(pitcher_entry.first_name), str(pitcher_entry.last_name)

    #Season stats
    try:
        season_stats = BaseballReference.get_season_pitcher_stats(pitcher_entry.baseball_reference_id)
        pregame_pitcher_entry.season_bf = int(season_stats["BF"])
        pregame_pitcher_entry.season_ip = float(season_stats["IP"])
        pregame_pitcher_entry.season_h = int(season_stats["H"])
        pregame_pitcher_entry.season_hr = int(season_stats["HR"])
        pregame_pitcher_entry.season_er = int(season_stats["ER"])
        pregame_pitcher_entry.season_bb = int(season_stats["BB"])
        pregame_pitcher_entry.season_so = int(season_stats["SO"])
        pregame_pitcher_entry.season_wins = int(season_stats["W"])
        pregame_pitcher_entry.season_losses = int(season_stats["L"])
    except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(pitcher_entry.first_name), str(pitcher_entry.last_name)

    return pregame_pitcher_entry
Пример #11
0
def get_hitter_stats(batter_id, pitcher_id, team, pitcher_hand, database_session):
    """ Get the career, last 14 day, vs hand stats from the RotoWire player page, get t
    :param rotowire_id: the RotoWire unique ID of this player
    :param pitcher_hand: a str representation of the hand the pitcher throws with ("L" or "R")
    :return: a PregameHitterGameEntry object without the predicted_draftkings_points field populated
    """
    pregame_hitter_entry = PregameHitterGameEntry()
    pregame_hitter_entry.rotowire_id = batter_id
    pregame_hitter_entry.pitcher_id = pitcher_id
    pregame_hitter_entry.team = team

    # Career stats
    hitter_entry = database_session.query(HitterEntry).get(batter_id)
    if hitter_entry is None:
        raise HitterNotFound(batter_id)

    hitter_career_soup = BaseballReference.get_hitter_page_career_soup(hitter_entry.baseball_reference_id)
    try:
        career_stats = BaseballReference.get_career_hitting_stats(hitter_entry.baseball_reference_id, hitter_career_soup)
        pregame_hitter_entry.career_pa = int(career_stats["PA"])
        pregame_hitter_entry.career_ab = int(career_stats["AB"])
        pregame_hitter_entry.career_r = int(career_stats["R"])
        pregame_hitter_entry.career_h = int(career_stats["H"])
        pregame_hitter_entry.career_hr = int(career_stats["HR"])
        pregame_hitter_entry.career_rbi = int(career_stats["RBI"])
        pregame_hitter_entry.career_sb = int(career_stats["SB"])
        pregame_hitter_entry.career_cs = int(career_stats["CS"])
        pregame_hitter_entry.career_bb = int(career_stats["BB"])
        pregame_hitter_entry.career_so = int(career_stats["SO"])
    #TODO: add ColumnNotFound exception to BaseballReference
    except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(hitter_entry.first_name), str(hitter_entry.last_name)

    # Vs hand of the opposing pitcher
    if pitcher_hand == "L":
        pitcher_hand_lr = BaseballReference.HandEnum.LHP
    elif pitcher_hand == "R":
        pitcher_hand_lr = BaseballReference.HandEnum.RHP
    else:
        print "Invalid pitcher hand %i" % pitcher_hand
        assert 0
    try:
        vs_hand_stats = BaseballReference.get_vs_hand_hitting_stats(hitter_entry.baseball_reference_id, pitcher_hand_lr, hitter_career_soup)
        pregame_hitter_entry.vs_hand_pa = int(vs_hand_stats["PA"])
        pregame_hitter_entry.vs_hand_ab = int(vs_hand_stats["AB"])
        pregame_hitter_entry.vs_hand_r = int(vs_hand_stats["R"])
        pregame_hitter_entry.vs_hand_h = int(vs_hand_stats["H"])
        pregame_hitter_entry.vs_hand_hr = int(vs_hand_stats["HR"])
        pregame_hitter_entry.vs_hand_rbi = int(vs_hand_stats["RBI"])
        pregame_hitter_entry.vs_hand_sb = int(vs_hand_stats["SB"])
        pregame_hitter_entry.vs_hand_cs = int(vs_hand_stats["CS"])
        pregame_hitter_entry.vs_hand_bb = int(vs_hand_stats["BB"])
        pregame_hitter_entry.vs_hand_so = int(vs_hand_stats["SO"])
    except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(hitter_entry.first_name), str(hitter_entry.last_name)

    # Recent stats
    try:
        recent_stats = BaseballReference.get_recent_hitting_stats(hitter_entry.baseball_reference_id, hitter_career_soup)
        pregame_hitter_entry.recent_pa = int(recent_stats["PA"])
        pregame_hitter_entry.recent_ab = int(recent_stats["AB"])
        pregame_hitter_entry.recent_r = int(recent_stats["R"])
        pregame_hitter_entry.recent_h = int(recent_stats["H"])
        pregame_hitter_entry.recent_hr = int(recent_stats["HR"])
        pregame_hitter_entry.recent_rbi = int(recent_stats["RBI"])
        pregame_hitter_entry.recent_sb = int(recent_stats["SB"])
        pregame_hitter_entry.recent_cs = int(recent_stats["CS"])
        pregame_hitter_entry.recent_bb = int(recent_stats["BB"])
        pregame_hitter_entry.recent_so = int(recent_stats["SO"])
    except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(hitter_entry.first_name), str(hitter_entry.last_name)

    #Season stats
    try:
        season_stats = BaseballReference.get_season_hitting_stats(hitter_entry.baseball_reference_id)
        pregame_hitter_entry.season_pa = int(season_stats["PA"])
        pregame_hitter_entry.season_ab = int(season_stats["AB"])
        pregame_hitter_entry.season_r = int(season_stats["R"])
        pregame_hitter_entry.season_h = int(season_stats["H"])
        pregame_hitter_entry.season_hr = int(season_stats["HR"])
        pregame_hitter_entry.season_rbi = int(season_stats["RBI"])
        pregame_hitter_entry.season_sb = int(season_stats["SB"])
        pregame_hitter_entry.season_cs = int(season_stats["CS"])
        pregame_hitter_entry.season_bb = int(season_stats["BB"])
        pregame_hitter_entry.season_so = int(season_stats["SO"])
    except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound) as e:
        print str(e), "with", str(hitter_entry.first_name), str(hitter_entry.last_name)

    # Career versus this pitcher
    pitcher_entry = database_session.query(PitcherEntry).get(pregame_hitter_entry.pitcher_id)
    # Couldn't find the pitcher, just continue and use default values
    if pitcher_entry is None:
        return pregame_hitter_entry
    else:
        try:
            vs_pitcher_stats = BaseballReference.get_vs_pitcher_stats(hitter_entry.baseball_reference_id,
                                                                      pitcher_entry.baseball_reference_id)
            pregame_hitter_entry.vs_pa = int(vs_pitcher_stats["PA"])
            pregame_hitter_entry.vs_ab = int(vs_pitcher_stats["AB"])
            pregame_hitter_entry.vs_h = int(vs_pitcher_stats["H"])
            pregame_hitter_entry.vs_hr = int(vs_pitcher_stats["HR"])
            pregame_hitter_entry.vs_rbi = int(vs_pitcher_stats["RBI"])
            pregame_hitter_entry.vs_bb = int(vs_pitcher_stats["BB"])
            pregame_hitter_entry.vs_so = int(vs_pitcher_stats["SO"])
        except (BaseballReference.TableNotFound, BaseballReference.TableRowNotFound, BaseballReference.DidNotFacePitcher) as e:
            print str(e), "with", str(hitter_entry.first_name), str(hitter_entry.last_name)

        return pregame_hitter_entry