Пример #1
0
 def __init__(self, ids):
     self.gameidlist = []
     self.gameinseason = []
     #        self.teamname = input('What team? ')
     #        season = input('What season? ')
     self.teamname = "Virginia Tech"
     season = "2013"
     url = (
         "http://espn.go.com/college-football/team/schedule/_/id/"
         + ids[self.teamname][0]
         + "/year/"
         + str(season)
         + "/"
         + ids[self.teamname][1]
     )
     soup = muf.cleanhtml(url)
     for link in soup.find_all("a"):
         if link.get("href")[:14] == "/ncf/recap?id=":
             # link should look like:
             # http://espn.go.com/ncf/recap?id=292620259
             self.gameidlist.append(link.get("href")[14:])
         if link.get("href")[:17] == "/ncf/boxscore?id=":
             # link should look like:
             # http://espn.go.com/ncf/boxscore?id=262990259
             self.gameidlist.append(link.get("href")[17:])
     # open and parse for tuple of games, and return
     for games in self.gameidlist:
         self.gameinseason.append(gpbp.Game(games, self.teamname, season))
Пример #2
0
    def __init__(self, gameid, teamname, season):
        #http://espn.go.com/ncf/playbyplay?gameId=292620259&period=0
#        url = 'http://espn.go.com/ncf/playbyplay?gameId=' + gameid + '&period=0'
        #apparent update to format
        #http://espn.go.com/college-football/playbyplay?gameId=242410259
        url = 'http://espn.go.com/ncf/playbyplay?gameId=' + gameid
        soup = muf.cleanhtml(url)
        game_pbp = ""
        self.teamname = ""
        self.opponent = ""
        self.season = season
        #title from the ESPN page, which typically includes both team names in full, plus game date
        #    <title>Western Michigan Broncos vs. Virginia Tech Hokies - Play By Play - September 27, 2014 - ESPN</title>
        self.game_title = ""
        self.home = None
        for title in soup.findAll("title"):
            titletxt = title.get_text()
            team1 = titletxt[:titletxt.find(' v')]
            team2 = titletxt[titletxt.find('.')+2:titletxt.find(' -')]
            #print(team1, " ", team2)
            if all(word in team1 for word in teamname.split()):
                self.teamname = team1
                self.opponent = team2
                self.home = False
            elif all(word in team2 for word in teamname.split()):
                self.teamname = team2
                self.opponent = team1
                self.home = True
        self.game_result = ""
        for game in soup.findAll("div", { "class" : "competitors" }):
            for team in game.findAll("tr"):
                for teams in team.findAll("td", { "class" : "team-name" }):
                    #print('teams: ', teams)
                    self.game_result += teams.get_text() + " "
                for scores in team.find_all("td", { "class" : "final-score" }):
                    #print('scores: ', scores)
                    self.game_result += scores.get_text()  + " "
        #print('game_result: ', self.game_result)
        #list of team and opponents names and abbreviation that are likely to be used
        self.tmnmlist = [self.teamname]
        self.oppnmlist = [self.opponent]
        gameresultlist = self.game_result.split()
        #print('gameresultlist: ', gameresultlist)
        if len(gameresultlist)>4:
            while len(gameresultlist)>4:
                if not gameresultlist[0].isdigit() and not gameresultlist[1].isdigit():
                    gameresultlist[0] = gameresultlist[0] + " " + gameresultlist[1]
                    gameresultlist.remove(gameresultlist[1])
                elif not gameresultlist[2].isdigit() and not gameresultlist[3].isdigit():
                    gameresultlist[2] = gameresultlist[2] + " " + gameresultlist[3]
                    gameresultlist.remove(gameresultlist[3])
                    
        if self.home:
            if muf.is_abbrev(gameresultlist[2], self.teamname) and muf.is_abbrev(gameresultlist[0], self.opponent):
                self.tmnmlist.append(gameresultlist[2])
                self.oppnmlist.append(gameresultlist[0])
            elif muf.is_abbrev(gameresultlist[2], self.opponent) and muf.is_abbrev(gameresultlist[0], self.teamname):
                self.oppnmlist.append(gameresultlist[2])
                self.tmnmlist.append(gameresultlist[0])
            elif muf.is_abbrev(gameresultlist[2], self.teamname) and not muf.is_abbrev(gameresultlist[2], self.opponent):
                self.tmnmlist.append(gameresultlist[2])
                self.oppnmlist.append(gameresultlist[0])
            elif muf.is_abbrev(gameresultlist[2], self.opponent) and not muf.is_abbrev(gameresultlist[2], self.teamname):
                self.oppnmlist.append(gameresultlist[2])
                self.tmnmlist.append(gameresultlist[0])                
            elif muf.is_abbrev(gameresultlist[0], self.teamname) and not muf.is_abbrev(gameresultlist[0], self.opponent):
                self.tmnmlist.append(gameresultlist[0])
                self.oppnmlist.append(gameresultlist[2])
            elif muf.is_abbrev(gameresultlist[0], self.opponent) and not muf.is_abbrev(gameresultlist[0], self.teamname):
                self.oppnmlist.append(gameresultlist[0])
                self.tmnmlist.append(gameresultlist[2])
            else:
                print('could not make sense of abbrevs')
        else:
            if muf.is_abbrev(gameresultlist[2], self.opponent) and muf.is_abbrev(gameresultlist[0], self.teamname):
                self.tmnmlist.append(gameresultlist[0])
                self.oppnmlist.append(gameresultlist[2])
            elif muf.is_abbrev(gameresultlist[2], self.teamname) and muf.is_abbrev(gameresultlist[0], self.opponent):
                self.oppnmlist.append(gameresultlist[0])
                self.tmnmlist.append(gameresultlist[2])
            elif muf.is_abbrev(gameresultlist[2], self.opponent) and not muf.is_abbrev(gameresultlist[2], self.teamname):
                self.tmnmlist.append(gameresultlist[0])
                self.oppnmlist.append(gameresultlist[2])
            elif muf.is_abbrev(gameresultlist[2], self.teamname) and not muf.is_abbrev(gameresultlist[2], self.opponent):
                self.oppnmlist.append(gameresultlist[0])
                self.tmnmlist.append(gameresultlist[2])                
            elif muf.is_abbrev(gameresultlist[0], self.opponent) and not muf.is_abbrev(gameresultlist[0], self.teamname):
                self.tmnmlist.append(gameresultlist[2])
                self.oppnmlist.append(gameresultlist[0])
            elif muf.is_abbrev(gameresultlist[0], self.teamname) and not muf.is_abbrev(gameresultlist[0], self.opponent):
                self.oppnmlist.append(gameresultlist[2])
                self.tmnmlist.append(gameresultlist[0]) 
            else:
                print('could not make sense of abbrevs')
        #pbps is a data container for each string from ESPNs playbyplay file
        #which should include information about the quarter, drive, etc, in addition
        #to the actual play data
        self.pbps = []
        #plays is a structure to store the actual play data, includeing all relevent
        #data about the current state of the game
        self.plays = []
        self.ply = []

        #here are my local variables for keeping track of game state information (ie quarter, drive, etc)
        gameperiod = 1  #1,2,3,4,5,6,7,8... etc, with 5 representing a first OT period, and so on
        drive = 1
        possesion = self.teamname
        gamestate = {'quarter': gameperiod, 'driveno': drive, 'teamname': self.tmnmlist, 'opponent': self.oppnmlist, 'season': self.season, 'home':self.home, 'offense': possesion,}

        for gamestring in soup.findAll("div", { "id" : "gamepackage-play-by-play" }):
            game_pbp = gamestring
        for play in game_pbp.findAll("li"):
            print("play: ", play)
            self.pbps.append(play.get_text(separator=u'<>'))
            if 'Quarter' in self.pbps[-1]:
                if str(gameperiod) not in self.pbps[-1]:
                    for char in self.pbps[-1]:
                        if char.isdigit():
                            gameperiod = int(char)
                print(self.pbps[-1], " ", gameperiod)
            elif 'OT ' in self.pbps[-1]:
                gameperiod = self.isDigitOT(self.pbps[-1])
                print(self.pbps[-1], " ", gameperiod)
            #if quarter or drive info, store to local variable, if play info, store info to plays, including quarter and drive info
            #play by play entries that still need to be dealt with
            elif any(word in self.pbps[-1] for word in ['coin toss', 'toss', 'kick', 'attempt failed', 'extra point', 'Extra Point', 'Extra point', 'Conversion', 'conversion', 'punt', 'FG', 'field goal', 'Field Goal', 'Penalty', 'PENALTY', 'penalty', 'Fumble']):
                pass
            #play by play entries I don't yet have plans for
            elif any(word in self.pbps[-1] for word in ['DRIVE', 'ball on', 'End']):
                pass
            #timeouts contain game clock info
            elif any(word in self.pbps[-1] for word in ['Timeout', 'timeout']):
                pass               
            elif any(word in self.pbps[-1] for word in ['rush ', 'run ', 'Run ', 'sacked ', 'scramble ']):
                currentplay = plys.Rushing(self.pbps[-1], gamestate)
                self.plays.append(currentplay.getplayinfo())
                self.ply.append(currentplay)
            elif any(word in self.pbps[-1] for word in ['pass ', 'incompletion ', 'interception ', 'Interception ']):
                currentplay = plys.Passing(self.pbps[-1], gamestate)
                self.plays.append(currentplay.getplayinfo())
            elif any(word in self.pbps[-1] for word in ['SAFETY', 'Safety', 'safety']):
                pass

            elif any(word in self.pbps[-1] for word in [':']):
                #contains the team currently with the ball
                possesion = self.pbps[-1].split()
                while len(possesion)>1:
                    if not possesion[0][0].isdigit() and not possesion[1][0].isdigit():
                        if possesion[1] == 'at':
                            for n in range(len(possesion)-1):
                                possesion.remove(possesion[1])
                        else:
                            possesion[0] = possesion[0] + " " + possesion[1]
                            possesion.remove(possesion[1])
                    else:
                         possesion.remove(possesion[1])
                        
                #likely also containing the best game clock info for the game
                pass
            elif any(word in self.pbps[-1] for word in ['at','and']):
                #these might be empty plays due to inconsistencies and errors in espns play by play
                #may need to add some checks for this?
                pass
            else:
                #pass
                print("these are unhandled plays: ", self.pbps[-1])
            gamestate['quarter'] = gameperiod
            gamestate['driveno'] = drive
            gamestate['offense'] = possesion