def scrape_schedule(date_from, date_to, preseason=False, not_over=False): """ Calls getSchedule and scrapes the raw schedule Json :param date_from: scrape from this date :param date_to: scrape until this date :param preseason: Boolean indicating whether include preseason games (default if False) :param not_over: Boolean indicating whether we scrape games not finished. Means we relax the requirement of checking if the game is over. :return: list with all the game id's """ schedule = [] schedule_json = chunk_schedule_calls(date_from, date_to) for chunk in schedule_json: for day in chunk: for game in day['games']: if game['status']['detailedState'] == 'Final' or not_over: game_id = int(str(game['gamePk'])[5:]) if (game_id >= 20000 or preseason) and game_id < 40000: schedule.append({ "game_id": game['gamePk'], "date": day['date'], "start_time": datetime.strptime(game['gameDate'][:-1], "%Y-%m-%dT%H:%M:%S"), "venue": game['venue'].get('name'), "home_team": shared.get_team(game['teams']['home']['team']['name'].upper()), "away_team": shared.get_team(game['teams']['away']['team']['name'].upper()), "home_score": game['teams']['home'].get("score"), "away_score": game['teams']['away'].get("score"), "status": game["status"]["abstractGameState"] }) return schedule
def get_teams(pbp_json): """ Get teams :param pbp_json: raw play by play json :return: dict with home and away """ return { 'Home': shared.get_team(pbp_json['gameData']['teams']['home']['name']), 'Away': shared.get_team(pbp_json['gameData']['teams']['away']['name']) }
def get_teams(pbp_json): """ Get teams :param pbp_json: raw play by play json :return: dict with home and away """ return { "Home": shared.get_team(pbp_json["gameData"]["teams"]["home"]["name"].upper()), "Away": shared.get_team(pbp_json["gameData"]["teams"]["away"]["name"].upper()), }
def get_players_json(game_json): """ Return dict of players for that game by team :param players_json: players section of json :return: {team -> players} """ players = {"home": {}, "away": {}} for venue in players: team_players = game_json['liveData']['boxscore']['teams'][venue][ 'players'] team_name = shared.get_team( game_json['liveData']['boxscore']['teams'][venue]['team']['name']) for id_key in team_players: player_name = shared.fix_name( team_players[id_key]['person']['fullName']) players[venue][player_name] = { "id": team_players[id_key]['person']['id'], "last_name": game_json['gameData']['players'][id_key]['lastName'].upper() } return players
def scrape_schedule(date_from, date_to, preseason=False, live=False): """ Calls getSchedule and scrapes the raw schedule Json :param date_from: scrape from this date :param date_to: scrape until this date :param preseason: Boolean indicating whether include preseason games (default if False) :param live: Boolean indicating whether we are scraping live games. Means we relax the requirement of checking if the game is over. :return: list with all the game id's """ schedule = [] schedule_json = get_schedule(date_from, date_to) for day in schedule_json["dates"]: for game in day["games"]: if game["status"]["detailedState"] == "Final" or live: game_id = int(str(game["gamePk"])[5:]) if (game_id >= 20000 or preseason) and game_id < 40000: game_time = datetime.datetime.strptime( game["gameDate"][:-1], "%Y-%m-%dT%H:%M:%S") schedule.append({ "game_id": game["gamePk"], "date": day["date"], "start_time": game_time, "home_team": shared.get_team( game["teams"]["home"]["team"]["name"].upper()), "away_team": shared.get_team( game["teams"]["away"]["team"]["name"].upper()), "status": game["status"]["abstractGameState"], }) return schedule
def get_teams(response): """ Extract Teams for date from doc :param response: doc :return: list of teams """ soup = BeautifulSoup(response, 'lxml') td = soup.findAll('td', {'class': "team"}) teams = [ shared.get_team(t.get_text().upper()) for t in td if t.get_text() != '' ] # Make a list of both teams for each game games = [teams[i:i + 2] for i in range(0, len(teams), 2)] return games
def analyze_shifts(shift, name, team, home_team, player_ids): """ Analyze shifts for each player when using. Prior to this each player (in a dictionary) has a list with each entry being a shift. :param shift: info on shift :param name: player name :param team: given team :param home_team: home team for given game :param player_ids: dict with info on players :return: dict with info for shift """ shifts = dict() regex = re.compile("\d+") # Used to check if something contains a number shifts["Player"] = name.upper() shifts["Period"] = "4" if shift[1] == "OT" else shift[1] shifts["Team"] = shared.get_team(team.strip(" ")) shifts["Start"] = shared.convert_to_seconds(shift[2].split("/")[0]) shifts["Duration"] = shared.convert_to_seconds(shift[4].split("/")[0]) # I've had problems with this one...if there are no digits the time is f****d up if regex.findall(shift[3].split("/")[0]): shifts["End"] = shared.convert_to_seconds(shift[3].split("/")[0]) else: shifts["End"] = shifts["Start"] + shifts["Duration"] try: if home_team == team: shifts["Player_Id"] = player_ids["Home"][name.upper()]["id"] else: shifts["Player_Id"] = player_ids["Away"][name.upper()]["id"] except KeyError: shifts["Player_Id"] = "" return shifts
def analyze_shifts(shift, name, team, home_team, player_ids): """ Analyze shifts for each player when using. Prior to this each player (in a dictionary) has a list with each entry being a shift. :param shift: info on shift :param name: player name :param team: given team :param home_team: home team for given game :param player_ids: dict with info on players :return: dict with info for shift """ shifts = dict() regex = re.compile('\d+') # Used to check if something contains a number shifts['Player'] = name.upper() shifts['Period'] = '4' if shift[1] == 'OT' else shift[1] shifts['Team'] = shared.get_team(team.strip(' ')) shifts['Start'] = shared.convert_to_seconds(shift[2].split('/')[0]) shifts['Duration'] = shared.convert_to_seconds(shift[4].split('/')[0]) # I've had problems with this one...if there are no digits the time is f****d up if regex.findall(shift[3].split('/')[0]): shifts['End'] = shared.convert_to_seconds(shift[3].split('/')[0]) else: shifts['End'] = shifts['Start'] + shifts['Duration'] try: if home_team == team: shifts['Player_Id'] = player_ids['Home'][name.upper()]['id'] else: shifts['Player_Id'] = player_ids['Away'][name.upper()]['id'] except KeyError: shifts['Player_Id'] = '' return shifts