def scrape_list_of_games(games, if_scrape_shifts): """ Given a list of game_id's (and a date for each game) it scrapes them :param games: list of [game_id, date] :param if_scrape_shifts: Boolean indicating whether to also scrape shifts :return: DataFrame of pbp info, also shifts if specified """ pbp_dfs = [] shifts_dfs = [] for game in games: pbp_df, shifts_df = game_scraper.scrape_game(str(game[0]), game[1], if_scrape_shifts) if pbp_df is not None: pbp_dfs.extend([pbp_df]) if shifts_df is not None: shifts_dfs.extend([shifts_df]) # Check if any games if len(pbp_dfs) == 0: return None, None pbp_df = pd.concat(pbp_dfs) pbp_df = pbp_df.reset_index(drop=True) pbp_df.apply(lambda row: game_scraper.check_goalie(row), axis=1) if if_scrape_shifts: shifts_df = pd.concat(shifts_dfs) shifts_df = shifts_df.reset_index(drop=True) else: shifts_df = None return pbp_df, shifts_df
def test_scrape_game(pbp_columns, shifts_columns): """ Tests if scrape pbp and shifts for game correctly with and without shifts. Check: 1. Returns either a DataFrame or None (for shifts when False) 2. The number of rows is correct 3. The columns are correct """ # 1. Try first without shifts pbp, shifts = game_scraper.scrape_game("2016020475", "2016-12-18", False) assert isinstance(pbp, pd.DataFrame) assert shifts is None assert pbp.shape[0] == 326 assert list(pbp.columns) == pbp_columns # 2. Try with shifts pbp, shifts = game_scraper.scrape_game("2007020222", "2007-11-08", True) assert isinstance(pbp, pd.DataFrame) assert isinstance(shifts, pd.DataFrame) assert pbp.shape[0] == 248 assert shifts.shape[0] == 726 assert list(pbp.columns) == pbp_columns assert list(shifts.columns) == shifts_columns