def point_counter(start, end): season = 2010 game_type = GameType.Regular san_jose = 'SAN JOSE SHARKS' csv_file = open('joe_thornton_points_summary.csv', 'w') csv_writer = csv.writer(csv_file) csv_writer.writerow(['Points', 'Vs_Team', 'Home/Away']) for i in range(start, end): game_num = i game_key = GameKey(season, game_type, game_num) game = Game(game_key) event_summary = EventSummary(game_key) if game.matchup is not None: # Check if San Jose was the home team if game.matchup['home'] == san_jose: # If the home team is San Jose assign the home_players attribute to home_players # This assigns a dictionary of players to home_players home_players = event_summary.home_players # Check if Joe Thornton played that game by checking if his number is in the dictionary # His number is 19 if 19 in home_players: # Points earned in that game is within the home_players dictionary points = home_players[19]['p'] # Setting the vs_team variable to the team the Sharks played against vs_team = game.matchup['away'] home_or_away = 'Home' csv_writer.writerow([points, vs_team, home_or_away]) # Check if San Jose was the away team if game.matchup['away'] == san_jose: # If the away team is San Jose assign the away_players attribute to away_players # This assigns a dictionary of players to home_players away_players = event_summary.away_players # Check if Joe Thornton played that game by checking if his number is in the dictionary # His number is 19 if 19 in away_players: # Points earned in that game is within the home_players dictionary points = away_players[19]['p'] # Setting the vs_team variable to the team the Sharks played against vs_team = game.matchup['home'] home_or_away = 'Away' csv_writer.writerow([points, vs_team, home_or_away]) csv_file.close()
def RetrieveSingleGameData(self, season, game, game_type=2): """ Retrieve Data from Specified Game TODO: Support pulling data from playoffs and preseason :param season: Season the game took place in :param game: number of the game to retrieve :param game_type: Type of game that we are attempting to grab, 1 = Preseason, 2 = Regular Season, 3 = Playoff, DEFAULT 2 """ gk = GameKey(season, game_type, game) g = Game(gk) data = g.load_all() if g.matchup == None: print("Unable to get matchup") return False game_id = self.insertGameInfo(season, game, g.matchup) teamName = g.matchup['home'] self.addGameDataToQueueWithTeam(teamName, g.event_summary.home_players, game_id) teamName = g.matchup['away'] self.addGameDataToQueueWithTeam(teamName, g.event_summary.away_players, game_id) return True
def test_game(self): from nhlscrapi.games.game import Game, GameKey, GameType from nhlscrapi.games.cumstats import Score, ShotCt, EvenStShotCt, Corsi, Fenwick fin_score = { } try: season = 2014 # 2013-2014 season game_num = 1226 # game_type = GameType.Regular # regular season game game_key = GameKey(season, game_type, game_num) # define stat types that will be counted as the plays are parsed cum_stats = { 'Score': Score(), 'Shots': ShotCt(), 'EvenShots': EvenStShotCt(), 'Corsi': Corsi(), 'Fenwick': Fenwick() } game = Game(game_key, cum_stats=cum_stats) # will call all the http reqs (bc lazy) fin_score = game.cum_stats['Score'] except Exception as e: self.assertEqual(0, 1, 'Loading error: {0}'.format(e)) # final score test self.assertEqual(fin_score.total, { 'OTT': 3, 'PIT': 2 }, 'Incorrect final score: {}'.format(fin_score.total)) # shootout goal tally test test_val = fin_score.shootout.total['OTT'] self.assertEqual(test_val, 2, 'Incorrect OTT shootout goal count: {}'.format(test_val)) # shot count test test_val = game.cum_stats['Shots'].total self.assertEqual(test_val, {'PIT': 28, 'OTT': 33}, 'Invalid shot count: {}'.format(test_val)) # even strength shot count test test_val = game.cum_stats['EvenShots'].total self.assertEqual(test_val, {'PIT': 22, 'OTT': 18}, 'Invalid even strength shot count: {}'.format(test_val)) # even strength shot attempt (corsi) test test_val = game.cum_stats['Corsi'].total self.assertEqual(test_val, {'PIT': 36, 'OTT': 39}, 'Invalid (Corsi) shot attempt count: {}'.format(test_val)) # even strength, close, shot attempts ex blocks/misses (Fenwick) test test_val = game.cum_stats['Fenwick'].total self.assertEqual(test_val, {'PIT': 30, 'OTT': 29}, 'Invalid (Fenwick) shot attempt count: {}'.format(test_val))
def build_rosters(session): for year in range(2008, 2018): game_num = 1 while 1: game_key = GameKey(year, GameType.Regular, game_num) game = Game(game_key) if game.away_coach is None: break print('Working game {0}'.format(game_key.to_tuple())) _build_roster(session, game_key) game_num += 1 session.commit()
if not 1 <= game_num <= C.GAME_CT_DICT[season]: print 'Invalide game number: %i' % game_num sys.exit(0) print season, game_num, reg_season gt = GameType.Regular if reg_season else GameType.Playoffs gk = GameKey(season, gt, game_num) cum_stats = { 'Score': Score(), 'Shots': ShotCt(), 'ShotAtt': ShotAttemptCt(), 'Corsi': Corsi(), 'Fenwick': Fenwick() } game = Game(gk, cum_stats=cum_stats) out_f = ''.join(str(x) for x in gk.to_tuple()) + '.json' with open(out_f, 'w') as f: # auto computes when using game wrapper # print 'Final :', game.cum_stats['Score'].total # print 'Shootout :', game.cum_stats['Score'].shootout.total # print 'Shots :', game.cum_stats['Shots'].total # print 'Shot Attempts :', game.cum_stats['ShotAtt'].total # print 'EV Shot Atts :', game.cum_stats['Corsi'].total # print 'Corsi :', game.cum_stats['Corsi'].share() # print 'FW Shot Atts :', game.cum_stats['Fenwick'].total # print 'Fenwick :', game.cum_stats['Fenwick'].share() # print '\nRefs :', game.refs
from nhlscrapi.games.game import Game, GameKey, GameType from nhlscrapi.games.cumstats import Score, ShotCt, Corsi, Fenwick import io, json for i in range(10, 13): season = 2011 # 2010-2011 season game_num = i gp = 1 game_type = GameType.Regular try: # regular season game game_key = GameKey(season, game_type, game_num) print "Game Number: ", i # define stat types that will be counted as the plays are parsed cum_stats = { 'Score': Score(), 'Shots': ShotCt(), 'Corsi': Corsi(), 'Fenwick': Fenwick() } game = Game(game_key, cum_stats=cum_stats) print('Final : {}'.format(game.cum_stats['Score'].total)) print('Shootout : {}'.format( game.cum_stats['Score'].shootout.total)) print('Shots : {}'.format(game.cum_stats['Shots'].total)) print('EV Shot Atts : {}'.format(game.cum_stats['Corsi'].total)) print('Corsi : {}'.format(game.cum_stats['Corsi'].share())) print('FW Shot Atts : {}'.format(game.cum_stats['Fenwick'].total)) print('Fenwick : {}'.format(game.cum_stats['Fenwick'].share())) game.load_all() except KeyError: print "Game Doesn't exist"
def point_counter(start, end, team_name, season_year, player_num, file_name): ''' This function creates a csv of a player's game log from the nhlscrapi. start - enter the number of the game you want to start getting data from end - enter the number of the game you want to end on The start/end number correspond to specific game numbers. So for example if you want to get data for the whole season then enter 1 for start and 1231 for end. (1230 games played in 30 team NHL season) team_name - Name of the player's team given as a string in all capitals and spaces included season_year - Enter the season year as a number using the year that season ended on. For example, for the 2009-2010 season you would enter 2010. player_num - Enter the number of player file_name - Enter in a string of what you want the output file name to be. For example, 'red.csv' ''' season = season_year game_type = GameType.Regular team = team_name file = file_name csv_file = open(file, 'w') csv_writer = csv.writer(csv_file) csv_writer.writerow(['Points', 'Vs_Team', 'Home/Away']) for i in range(start, end): game_num = i game_key = GameKey(season, game_type, game_num) game = Game(game_key) event_summary = EventSummary(game_key) if game.matchup is not None: if game.matchup['home'] == team: # If the home team is the supplied team, assign the home_players attribute to home_players # This assigns a dictionary of players to home_players home_players = event_summary.home_players # Check if the player played in said game by using his number(player_num) if player_num in home_players: # Points earned in that game is within the home_players dictionary points = home_players[player_num]['p'] # Setting the vs_team variable to the opponent vs_team = game.matchup['away'] home_or_away = 'Home' csv_writer.writerow([points, vs_team, home_or_away]) if game.matchup['away'] == team: # If the away team is is the supplied team, assign the away_players attribute to away_players # This assigns a dictionary of players to home_players away_players = event_summary.away_players # Check if the player played in said game by using his number(player_num) if player_num in away_players: # Points earned in that game is within the home_players dictionary points = away_players[player_num]['p'] # Setting the vs_team variable to the opponent vs_team = game.matchup['home'] home_or_away = 'Away' csv_writer.writerow([points, vs_team, home_or_away]) csv_file.close()
def build_games(session): for year in range(2002, datetime.now().year + 1): year_games_fnm = os.path.join('raw_data', 'hr_year_{0}.html'.format(year)) if not os.path.isfile(year_games_fnm): url = 'http://www.hockey-reference.com/leagues/NHL_{0}_games.html'.format( year) logging.debug('Reading HTML from link: {0}'.format(url)) try: user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) ' \ 'Gecko/2009021910 Firefox/3.0.7' headers = { 'User-Agent': user_agent, } request = urllib.request.Request( url, None, headers) # The assembled request response = urllib.request.urlopen(request) html = str(response.read()) except urllib.request.HTTPError: continue with open(year_games_fnm, 'w') as f: f.write(html) else: with open(year_games_fnm, 'r') as f: html = f.read() soup = BeautifulSoup(html) t = soup.find('table', {'id': 'games'}) try: rows = t.findAll('tr') except AttributeError: continue data = [] data_stats = [ 'game_id', 'home_goals', 'attendance', 'home_team_name', 'away_team_name', 'away_goals', 'game_remarks', 'overtimes', 'game_duration' ] data_types = dict( zip(data_stats, [str, int, int, str, str, int, str, str, time])) for r in rows[1:]: data.append(dict(zip(data_stats, [None] * len(data_stats)))) h = r.find('th') data[-1]['game_id'] = h.attrs['csk'] for ds in data_stats: for td in r.findAll('td', {'data-stat': ds}): if data_types[ds] is str: data[-1][ds] = td.text elif data_types[ds] is int: if not len(td.text): data[-1][ds] = None else: data[-1][ds] = td.text.replace(',', '') elif data_types[ds] is time: if len(td.text.strip()): data[-1][ds] = time( *[int(d) for d in td.text.split(':')]) else: data[-1][ds] = None for dat in data: session.merge(Game(**dat)) session.flush()
import nhlscrapi from nhlscrapi.games.game import GameType, GameKey, Game from random import seed from random import sample, randint # seed random number generator seed(1) # prepare a sequence sequence = [i for i in range(1271)] # select a subset without replacement subset = sample(sequence, 60) print(subset) for i in subset: season = 2019 # 2013-2014 season game_num = i game_type = GameType.Regular # regular season game game_key = GameKey(season, game_type, game_num) game = Game(game_key) period = randint(1, 3) # game.load_all() print(game.matchup) print(period)
def mine_data(self): """ main mining routine in mining class - fetches data using nhlscrapi """ for season in self._seasonList: self._actualSeason=season #for every season we set the gameID to zero self._gameId=0 #create files for added and delta values self.write_header(plusMinus=1) self.write_header(plusMinus=-1) VerbosityF(0,"Mine for data in season ",season) for self._actualGameType in self._gameTypeList: VerbosityF(1,"Fetching data for game type ",self._actualGameType) #number of games per season is stored in constant of NHLscrapi #ToassertF(C.GAME_CT_DICT[season]==0,nonCritical,"no games for season ",season) gameNum=0 #for gameNum in range(self._GAMENUMAX): coachName="noCoach" while gameNum < range(self._GAMENUMAX) and not coachName=="": gameNum=gameNum+1 game_key=GameKey(self._actualSeason,self._actualGameType,gameNum) gameObj=Game(game_key) try: coachName=Game(GameKey(self._actualSeason,self._actualGameType,gameNum)).away_coach #exist=isinstance(gameObj.plays,list) except IndexError: coachName="" if not coachName=="": #for gameNum in range(1,C.GAME_CT_DICT[season]+1): VerbosityF(1,"Fetching data for game ",gameNum) do_print=1 try: gameObj.load_all() dateOfMatch=gameObj.matchup["date"] gameDateReformatted=self.getGameDate(dateOfMatch) referenceDate=self.getReferenceDate(season) self._relativeDate=(gameDateReformatted-referenceDate).days sumDict=gameObj.event_summary #sumDict=nhl.games.eventsummary.EventSummary(game_key) #faceoff statistics sumgameDict=sumDict.totals() #sumgameDict=nhl.games.game.EventSummary(game_key).totals() takeAwaysHome=sumgameDict["home"]["tk"] takeAwaysAway=sumgameDict["away"]["tk"] giveAwaysHome=sumgameDict["home"]["gv"] giveAwaysAway=sumgameDict["away"]["gv"] pimHome=sumgameDict["home"]["pim"] pimAway=sumgameDict["away"]["pim"] hitHome=sumgameDict["home"]["ht"] hitAway=sumgameDict["away"]["ht"] afo=sumDict.away_fo hfo=sumDict.home_fo hfov=hfo["ev"]["won"] afov=afo["ev"]["won"] score_hometeam=int(gameObj.matchup["final"]["home"]) score_awayteam=int(gameObj.matchup["final"]["away"]) #sumgame=nhl.games.game.EventSummary(game_key) ppgoalsHomeTeam=sumDict.home_shots['agg']['pp']['g'] ppgoalsAwayTeam=sumDict.away_shots['agg']['pp']['g'] shotsHome=int(sumgameDict["home"]["s"]) shotsAway=int(sumgameDict["away"]["s"]) self._homeTeam=gameObj.matchup["home"] self._awayTeam=gameObj.matchup["away"] self._attendance=gameObj.matchup["attendance"] except: #do nothing - simply do not print the current dataset do_print=0 # we print two files - one with summed, one with delta values for plusMinus in [1,-1]: if isinstance(hfov,int): self._diff_fo=int(hfov)-plusMinus*int(afov) #else: # self._diff_fo="NAN" self._deltaTA=takeAwaysHome-plusMinus*takeAwaysAway self._deltaGA=giveAwaysHome-plusMinus*giveAwaysAway self._deltaPM=pimHome-plusMinus*pimAway self._deltaHT=hitHome-plusMinus*hitAway self._score_diff=score_hometeam-plusMinus*score_awayteam self._delta_ppgoals=ppgoalsHomeTeam-plusMinus*ppgoalsAwayTeam self._shots_diff=shotsHome-plusMinus*shotsAway if plusMinus==1: haveWonLambda = lambda k: 1 if k>0 else 0 self._homeHasWon=haveWonLambda(self._score_diff) self._gameId=self._gameId+1 #now print this dataset self.write_data(do_print,plusMinus)