def scrape_games(games, if_scrape_shifts, data_format='csv'): """ Scrape a list of games :param games: list of game_ids :param if_scrape_shifts: Boolean indicating whether to also scrape shifts :param data_format: format you want data in - csv or json (csv is default) :param preseason: Boolean indicating whether include preseason games (default if False) :return: Json string or None """ check_data_format(data_format) json_dfs = dict() # Holds json of data if choose to return that # Create List of game_id's and dates games_list = json_schedule.get_dates(games) # Scrape pbp and shifts pbp_df, shifts_df = scrape_list_of_games(games_list, if_scrape_shifts) if data_format.lower() == 'csv': to_csv(str(random.randint(1, 101)), pbp_df, shifts_df) else: if pbp_df is not None: json_dfs['pbp'] = pbp_df.to_dict('records') if shifts_df is not None: json_dfs['shifts'] = shifts_df.to_dict('records') # Print all errors associated with scrape call game_scraper.print_errors() if len(json_dfs.keys()) > 0: return json.dumps(json_dfs)
def scrape_seasons(seasons, if_scrape_shifts, data_format='csv', preseason=False): """ Given list of seasons it scrapes all the seasons :param seasons: list of seasons :param if_scrape_shifts: Boolean indicating whether to also scrape shifts :param data_format: format you want data in - csv or json (csv is default) :param preseason: Boolean indicating whether include preseason games (default if False) :return: Json string or None """ check_data_format(data_format) json_dfs = { 'pbp': dict(), 'shifts': dict() } # Holds json of data if choose to return that for season in seasons: from_date = '-'.join([str(season), '9', '1']) to_date = '-'.join([str(season + 1), '7', '1']) games = json_schedule.scrape_schedule(from_date, to_date, preseason) pbp_df, shifts_df = scrape_list_of_games(games, if_scrape_shifts) if data_format.lower() == 'csv': to_csv(str(season) + str(season + 1), pbp_df, shifts_df) else: if pbp_df is not None: json_dfs['pbp'][str(season)] = pbp_df.to_dict('records') if shifts_df is not None: json_dfs['shifts'][str(season)] = shifts_df.to_dict('records') # Print all errors associated with scrape call game_scraper.print_errors() # If we have something in there that means json was chosen if len(json_dfs.keys()) > 0: return json.dumps(json_dfs)
def scrape_date_range(from_date, to_date, if_scrape_shifts, data_format='csv', preseason=False): """ Scrape games in given date range :param from_date: date you want to scrape from :param to_date: date you want to scrape to :param if_scrape_shifts: Boolean indicating whether to also scrape shifts :param data_format: format you want data in - csv or json (csv is default) :param preseason: Boolean indicating whether include preseason games (default if False) :return: Json string or None """ check_data_format(data_format) check_valid_dates(from_date, to_date) json_dfs = dict() # Holds json of data if choose to return that games = json_schedule.scrape_schedule(from_date, to_date, preseason) pbp_df, shifts_df = scrape_list_of_games(games, if_scrape_shifts) if data_format.lower() == 'csv': to_csv(from_date + '--' + to_date, pbp_df, shifts_df) else: if pbp_df is not None: json_dfs['pbp'] = pbp_df.to_dict('records') if shifts_df is not None: json_dfs['shifts'] = shifts_df.to_dict('records') # Print all errors associated with scrape call game_scraper.print_errors() # If we have something in there that means json was chosen if len(json_dfs.keys()) > 0: return json.dumps(json_dfs)