Exemplo n.º 1
0
def get_espn_game_id(date, home_team, away_team):
    """
    Scrapes the day's schedule and gets the id for the given game
    Ex: http://www.espn.com/nhl/scoreboard?date=20161024

    :param date: format-> YearMonthDay-> 20161024
    :param home_team: home team
    :param away_team: away team

    :return: 9 digit game id
    """
    url = 'http://www.espn.com/nhl/scoreboard?date={}'.format(
        date.replace('-', ''))
    response = get_url(url)
    time.sleep(1)

    # If can't get or not there return None
    if not response:
        raise Exception

    game_ids = get_game_ids(response)
    games = get_teams(response)

    for i in range(len(games)):
        if home_team in games[i] or away_team in games[i]:
            return game_ids[i]
Exemplo n.º 2
0
def parse_player(player_list, player):
    """
    :param player_list = list of players from raw json
    :param player = player in player_list
    :return: dict of home & away playing rosters
    """

    players = dict()

    players["Player_Id"] = player_list[player]["id"]
    players["Name"] = fix_name(player_list[player]["fullName"].upper())
    # if 'currentTeam' in player_list[player]:
    #    players['Team'] = fix_team(player_list[player]['currentTeam']['triCode'])
    if 'primaryPosition' in player_list[player]:
        players['Pos'] = player_list[player]['primaryPosition']['abbreviation']
    if 'shootsCatches' in player_list[player]:
        players['Shoots'] = player_list[player]['shootsCatches']
    # if 'primaryNumber' in player_list[player]:
    #    players['Num'] = player_list[player]['primaryNumber']
    # if 'currentAge' in player_list[player]:
    #    players['Age'] = player_list[player]['currentAge']
    if 'birthDate' in player_list[player]:
        players['Birth_Date'] = player_list[player]['birthDate']
    if 'birthCity' in player_list[player]:
        players['Birth_City'] = player_list[player]['birthCity']
    if 'birthStateProvince' in player_list[player]:
        players['Birth_Region'] = player_list[player]['birthStateProvince']
    if 'birthCountry' in player_list[player]:
        players['Birth_Country'] = player_list[player]['birthCountry']
    if 'nationality' in player_list[player]:
        players['Nationality'] = player_list[player]['nationality']
    if 'height' in player_list[player]:
        players['Height'] = player_list[player]['height']
    if 'weight' in player_list[player]:
        players['Weight'] = player_list[player]['weight']

    # get draft info from player html page
    url = 'https://www.nhl.com/player/{}-{}-{}'.format(
        player_list[player]["firstName"], player_list[player]["lastName"],
        player_list[player]["id"])
    html = get_url(url)
    time.sleep(1)
    soup = BeautifulSoup(html.content, 'html.parser')

    spans = soup.find_all(
        'div', {'class': 'player-overview__bio'})  # find bio section
    bio = [i.get_text() for i in spans][0].split()  # split into list
    try:
        draft = bio[bio.index('Draft:'):bio.index('Draft:') +
                    9]  # find index for draft info.
        players['Draft_Year'] = int(draft[1])
        players['Draft_Team'] = draft[2].strip(',')
        players['Round'] = int(re.findall("\d+", draft[3])[0])
        players['Pick'] = int(re.findall("\d+", draft[5])[0])
        players['Overall'] = int(re.findall("\d+", draft[7])[0])
    except:
        pass  # player is undrafted

    return players
Exemplo n.º 3
0
def parse_player(player_list, player):
    """
    :param player_list = list of players from raw json
    :param player = player in player_list
    :return: dict of home & away playing rosters
    """

    players = dict()

    players['Player_Id'] = player_list[player]['id']
    players['Name'] = fix_name(player_list[player]['fullName'].upper())
    # often attributes are missing so we need to check first
    if 'primaryPosition' in player_list[player]:
        players['Pos'] = player_list[player]['primaryPosition']['abbreviation']
    if 'shootsCatches' in player_list[player]:
        players['Shoots'] = player_list[player]['shootsCatches']
    if 'birthDate' in player_list[player]:
        players['Birth_Date'] = player_list[player]['birthDate']
    if 'birthCity' in player_list[player]:
        players['Birth_City'] = player_list[player]['birthCity']
    if 'birthStateProvince' in player_list[player]:
        players['Birth_Region'] = player_list[player]['birthStateProvince']
    if 'birthCountry' in player_list[player]:
        players['Birth_Country'] = player_list[player]['birthCountry']
    if 'nationality' in player_list[player]:
        players['Nationality'] = player_list[player]['nationality']
    if 'height' in player_list[player]:
        players['Height'] = player_list[player]['height']
    if 'weight' in player_list[player]:
        players['Weight'] = player_list[player]['weight']

    # get draft info from player html page as it is not included in the json
    url = 'https://www.nhl.com/player/{}-{}-{}'.format(
        player_list[player]['firstName'], player_list[player]['lastName'],
        player_list[player]['id'])
    html = get_url(url)
    time.sleep(1)
    soup = BeautifulSoup(html.content, 'html.parser')

    spans = soup.find_all(
        'div', {'class': 'player-overview__bio'})  # find bio section
    bio = [i.get_text() for i in spans][0].split()  # split into list
    try:
        draft = bio[bio.index('Draft:'):bio.index('Draft:') +
                    9]  # find index for draft info.
        players['Draft_Year'] = int(draft[1])
        players['Draft_Team'] = fix_team(draft[2].strip(','))
        players['Draft_Round'] = int(re.findall("\d+", draft[3])[0])
        players['Draft_Pick'] = int(re.findall("\d+", draft[5])[0])
        players['Draft_Overall'] = int(re.findall("\d+", draft[7])[0])
    except:
        pass  # if player is undrafted this section does not exist. not as error so we will just skip this step

    old_players.append(player_list[player]['id'])

    return players
Exemplo n.º 4
0
def get_shifts(game_id):
    """
    Given a game_id it returns a DataFrame with the shifts for both teams
    Ex: http://www.nhl.com/scores/htmlreports/20162017/TV020971.HTM
    :param game_id: the game
    :return: DataFrame with all shifts, return None when an exception is thrown when parsing
    """
    game_id = str(game_id)
    home_url = 'http://www.nhl.com/scores/htmlreports/{}{}/TH{}.HTM'.format(game_id[:4], int(game_id[:4])+1,
                                                                            game_id[4:])
    away_url = 'http://www.nhl.com/scores/htmlreports/{}{}/TV{}.HTM'.format(game_id[:4], int(game_id[:4])+1,
                                                                            game_id[4:])

    home = get_url(home_url)
    time.sleep(1)

    away = get_url(away_url)
    time.sleep(1)

    return home, away
Exemplo n.º 5
0
def get_html(game_id):
    """
    Given a game_id it returns the raw Playing Roster html
    Ex: http://www.nhl.com/scores/htmlreports/20162017/RO020475.HTM
    :param game_id: 2016020475
    :return: raw html of game
    """
    game_id = str(game_id)
    url = 'http://www.nhl.com/scores/htmlreports/{}{}/RO{}.HTM'.format(
        game_id[:4],
        int(game_id[:4]) + 1, game_id[4:])

    return get_url(url)
Exemplo n.º 6
0
def get_shifts(game_id):
    """
    Given a game_id it returns the raw json
    Ex: http://www.nhl.com/stats/rest/shiftcharts?cayenneExp=gameId=2010020001
    :param game_id: the game
    :return:
    """
    url = 'http://www.nhl.com/stats/rest/shiftcharts?cayenneExp=gameId={}'.format(game_id)
    response = get_url(url)
    time.sleep(1)

    shift_json = json.loads(response.text)
    return parse_json(shift_json, game_id)
Exemplo n.º 7
0
def get_pbp(game_id):
    """
    Given a game_id it returns the raw html
    Ex: http://www.nhl.com/scores/htmlreports/20162017/PL020475.HTM
    :param game_id: the game
    :return: raw html of game
    """
    game_id = str(game_id)
    url = 'http://www.nhl.com/scores/htmlreports/{}{}/PL{}.HTM'.format(
        game_id[:4],
        int(game_id[:4]) + 1, game_id[4:])

    time.sleep(1)
    return get_url(url)
Exemplo n.º 8
0
def get_schedule(date_from, date_to):
    """
    Scrapes all games in given date range
    e.g. https://statsapi.web.nhl.com/api/v1/schedule?startDate=2010-10-03&endDate=2011-06-20
    :param date_from: scrape from this date
    :param date_to: scrape up to this date
    :return: raw json of NHL schedule for given date range
    """
    url = 'https://statsapi.web.nhl.com/api/v1/schedule?startDate={a}&endDate={b}'.format(a=date_from, b=date_to)

    response = get_url(url)
    time.sleep(1)

    schedule_json = json.loads(response.text)

    return schedule_json
Exemplo n.º 9
0
def scrape_roster(game_id):
    """
    For a given game scrapes the roster
    :param game_id: id for game
    :return: dict of players (home and away), dict of head coaches, dict of officials
    """

    try:
        html = get_html(game_id)
        time.sleep(1)
    except Exception as e:
        print('Roster for game {} is not there'.format(game_id), e)
        raise Exception

    try:
        soup = BeautifulSoup(html.content, 'html.parser')
        players = get_players(soup)
        head_coaches = get_coaches(soup)
        officials = get_officials(soup)
    except Exception as e:
        print('Problem with playing roster for game {}'.format(game_id), e)
        raise Exception

    try:
        game_id = str(game_id)
        url_game_summary = 'http://www.nhl.com/scores/htmlreports/{}{}/GS{}.HTM'.format(
            game_id[:4],
            int(game_id[:4]) + 1, game_id[4:])
        html_game_summary = get_url(url_game_summary)
        time.sleep(1)
    except Exception as e:
        print('Game Summary for game {} is not there'.format(game_id), e)
        raise Exception

    try:
        soup_game_summary = BeautifulSoup(html_game_summary.content,
                                          'html.parser')
        goalies = get_goalies(soup_game_summary)
        three_stars = get_stars(soup_game_summary)
    except Exception as e:
        print('Problem with game summary for game {}'.format(game_id), e)
        raise Exception

    return players, head_coaches, officials, goalies, three_stars
Exemplo n.º 10
0
def get_pbp(game_id):
    """
    Given a game_id it returns the raw json
    Ex: http://statsapi.web.nhl.com/api/v1/game/2016020475/feed/live
    :param game_id: the game
    :return: raw json of game
    """
    url = 'http://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(
        game_id)

    try:
        response = get_url(url)
        time.sleep(1)
        pbp_json = json.loads(response.text)
    except requests.exceptions.HTTPError as e:
        print('Json pbp for game {} is not there'.format(game_id), e)
        return None

    return pbp_json
Exemplo n.º 11
0
def get_espn(date, home_team, away_team):
    """
    Gets the ESPN pbp feed
    Ex: http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId=400885300

    :param date: date of the game
    :param home_team: home team
    :param away_team: away team

    :return: raw xml
    """
    game_id = get_espn_game_id(date, home_team.upper(), away_team.upper())

    url = 'http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId={}'.format(
        game_id)
    response = get_url(url)

    if response is None:
        raise Exception

    time.sleep(1)
    return response