Пример #1
0
def scrape_schedule(date_from, date_to, preseason=False, not_over=False):
    """
    Calls getSchedule and scrapes the raw schedule Json
    
    :param date_from: scrape from this date
    :param date_to: scrape until this date
    :param preseason: Boolean indicating whether include preseason games (default if False)
    :param not_over: Boolean indicating whether we scrape games not finished. 
                     Means we relax the requirement of checking if the game is over. 
    
    :return: list with all the game id's
    """
    schedule = []
    schedule_json = chunk_schedule_calls(date_from, date_to)

    for chunk in schedule_json:
        for day in chunk:
            for game in day['games']:
                if game['status']['detailedState'] == 'Final' or not_over:
                    game_id = int(str(game['gamePk'])[5:])

                    if (game_id >= 20000 or preseason) and game_id < 40000:
                        schedule.append({
                                "game_id": game['gamePk'], 
                                 "date": day['date'], 
                                 "start_time": datetime.strptime(game['gameDate'][:-1], "%Y-%m-%dT%H:%M:%S"),
                                 "venue": game['venue'].get('name'),
                                 "home_team": shared.get_team(game['teams']['home']['team']['name'].upper()),
                                 "away_team": shared.get_team(game['teams']['away']['team']['name'].upper()),
                                 "home_score": game['teams']['home'].get("score"),
                                 "away_score": game['teams']['away'].get("score"),
                                 "status": game["status"]["abstractGameState"]
                        })

    return schedule
Пример #2
0
def get_teams(pbp_json):
    """
    Get teams 

    :param pbp_json: raw play by play json

    :return: dict with home and away
    """
    return {
        'Home': shared.get_team(pbp_json['gameData']['teams']['home']['name']),
        'Away': shared.get_team(pbp_json['gameData']['teams']['away']['name'])
    }
Пример #3
0
def get_teams(pbp_json):
    """
    Get teams 

    :param pbp_json: raw play by play json

    :return: dict with home and away
    """
    return {
        "Home": shared.get_team(pbp_json["gameData"]["teams"]["home"]["name"].upper()),
        "Away": shared.get_team(pbp_json["gameData"]["teams"]["away"]["name"].upper()),
    }
Пример #4
0
def get_players_json(game_json):
    """
    Return dict of players for that game by team

    :param players_json: players section of json

    :return: {team -> players}
    """
    players = {"home": {}, "away": {}}

    for venue in players:
        team_players = game_json['liveData']['boxscore']['teams'][venue][
            'players']
        team_name = shared.get_team(
            game_json['liveData']['boxscore']['teams'][venue]['team']['name'])

        for id_key in team_players:
            player_name = shared.fix_name(
                team_players[id_key]['person']['fullName'])

            players[venue][player_name] = {
                "id":
                team_players[id_key]['person']['id'],
                "last_name":
                game_json['gameData']['players'][id_key]['lastName'].upper()
            }

    return players
Пример #5
0
def scrape_schedule(date_from, date_to, preseason=False, live=False):
    """
    Calls getSchedule and scrapes the raw schedule Json
    
    :param date_from: scrape from this date
    :param date_to: scrape until this date
    :param preseason: Boolean indicating whether include preseason games (default if False)
    :param live: Boolean indicating whether we are scraping live games. Means we relax the requirement of checking if 
                 the game is over. 
    
    :return: list with all the game id's
    """
    schedule = []
    schedule_json = get_schedule(date_from, date_to)

    for day in schedule_json["dates"]:
        for game in day["games"]:
            if game["status"]["detailedState"] == "Final" or live:
                game_id = int(str(game["gamePk"])[5:])
                if (game_id >= 20000 or preseason) and game_id < 40000:
                    game_time = datetime.datetime.strptime(
                        game["gameDate"][:-1], "%Y-%m-%dT%H:%M:%S")
                    schedule.append({
                        "game_id":
                        game["gamePk"],
                        "date":
                        day["date"],
                        "start_time":
                        game_time,
                        "home_team":
                        shared.get_team(
                            game["teams"]["home"]["team"]["name"].upper()),
                        "away_team":
                        shared.get_team(
                            game["teams"]["away"]["team"]["name"].upper()),
                        "status":
                        game["status"]["abstractGameState"],
                    })

    return schedule
Пример #6
0
def get_teams(response):
    """
    Extract Teams for date from doc
    
    :param response: doc
    
    :return: list of teams    
    """
    soup = BeautifulSoup(response, 'lxml')

    td = soup.findAll('td', {'class': "team"})
    teams = [
        shared.get_team(t.get_text().upper()) for t in td if t.get_text() != ''
    ]

    # Make a list of both teams for each game
    games = [teams[i:i + 2] for i in range(0, len(teams), 2)]

    return games
Пример #7
0
def analyze_shifts(shift, name, team, home_team, player_ids):
    """
    Analyze shifts for each player when using.
    Prior to this each player (in a dictionary) has a list with each entry being a shift.

    :param shift: info on shift
    :param name: player name
    :param team: given team
    :param home_team: home team for given game
    :param player_ids: dict with info on players
    
    :return: dict with info for shift
    """
    shifts = dict()

    regex = re.compile("\d+")  # Used to check if something contains a number

    shifts["Player"] = name.upper()
    shifts["Period"] = "4" if shift[1] == "OT" else shift[1]
    shifts["Team"] = shared.get_team(team.strip(" "))
    shifts["Start"] = shared.convert_to_seconds(shift[2].split("/")[0])
    shifts["Duration"] = shared.convert_to_seconds(shift[4].split("/")[0])

    # I've had problems with this one...if there are no digits the time is f****d up
    if regex.findall(shift[3].split("/")[0]):
        shifts["End"] = shared.convert_to_seconds(shift[3].split("/")[0])
    else:
        shifts["End"] = shifts["Start"] + shifts["Duration"]

    try:
        if home_team == team:
            shifts["Player_Id"] = player_ids["Home"][name.upper()]["id"]
        else:
            shifts["Player_Id"] = player_ids["Away"][name.upper()]["id"]
    except KeyError:
        shifts["Player_Id"] = ""

    return shifts
Пример #8
0
def analyze_shifts(shift, name, team, home_team, player_ids):
    """
    Analyze shifts for each player when using.
    Prior to this each player (in a dictionary) has a list with each entry being a shift.

    :param shift: info on shift
    :param name: player name
    :param team: given team
    :param home_team: home team for given game
    :param player_ids: dict with info on players
    
    :return: dict with info for shift
    """
    shifts = dict()

    regex = re.compile('\d+')  # Used to check if something contains a number

    shifts['Player'] = name.upper()
    shifts['Period'] = '4' if shift[1] == 'OT' else shift[1]
    shifts['Team'] = shared.get_team(team.strip(' '))
    shifts['Start'] = shared.convert_to_seconds(shift[2].split('/')[0])
    shifts['Duration'] = shared.convert_to_seconds(shift[4].split('/')[0])

    # I've had problems with this one...if there are no digits the time is f****d up
    if regex.findall(shift[3].split('/')[0]):
        shifts['End'] = shared.convert_to_seconds(shift[3].split('/')[0])
    else:
        shifts['End'] = shifts['Start'] + shifts['Duration']

    try:
        if home_team == team:
            shifts['Player_Id'] = player_ids['Home'][name.upper()]['id']
        else:
            shifts['Player_Id'] = player_ids['Away'][name.upper()]['id']
    except KeyError:
        shifts['Player_Id'] = ''

    return shifts