Пример #1
0
def createPlayerMap(gameids,currentMap):

    #use defaultdict to map playerids to game stats
    playerMap = defaultdict(OrderedDict)
    #playerMap = OrderedDict()

    print(sorted(gameids,key=lambda x: x[1]))

    #will keep track of playerids of injured players on both teams
    injuredIDMap = ReadWriteFiles.readInjuredIDMap()
    
    for gameid in sorted(gameids,key=lambda x: x[1]):
        gameid = gameid[0]
        #print(gameid)
        gameBoxScoreURL = "http://espn.go.com/nba/boxscore?gameId=" + gameid
        boxScorePage = requests.get(gameBoxScoreURL)
        boxScoreTree = html.fromstring(boxScorePage.content)

        gameInfoURL = "http://espn.go.com/nba/game?gameId=" + gameid
        gameInfoPage = requests.get(gameInfoURL)
        gameInfoTree = html.fromstring(gameInfoPage.content)

        #first get game data not specific to each player (time,date,score,team numbers etc)
        gameDataList = []
        #print(gameid)

        try:
            #game_time_info = boxScoreTree.xpath("//div[@class='game-time-location']/p/text()")[0] OLD ESPN
            game_time_info = gameInfoTree.xpath("//div[@class='game-date-time']/span/@data-date")[0]
            OTfinalStatus = gameInfoTree.xpath("//div[@class='game-status']/span/text()")[0]
            if("OT" in OTfinalStatus):

                [_,ot] = OTfinalStatus.split("/")
                if (ot == "OT"):
                    overtime = 1
                else:
                    overtime = int(ot[0])
            else:
                overtime = 0
            #print(OTfinalStatus)
            #print(game_time_info)

            #[awayName,awayScore] = boxScoreTree.xpath("//div[@class='team away']/div/h3/*/text()") OLD ESPN
            #[homeName,homeScore] = boxScoreTree.xpath("//div[@class='team home']/div/h3/*/text()") OLD ESPN

            awayName = boxScoreTree.xpath("//div[@class='team away']/div[@class='content']/div[@class='team-container']/div[@class='team-info']/a/span[@class='short-name']/text()")[0]
            homeName = boxScoreTree.xpath("//div[@class='team home']/div[@class='content']/div[@class='team-container']/div[@class='team-info']/a/span[@class='short-name']/text()")[0]

            awayScore = boxScoreTree.xpath("//div[@class='team away']/div[@class='content']/div[@class='score-container']/div/text()")[0]
            homeScore = boxScoreTree.xpath("//div[@class='team home']/div[@class='content']/div[@class='score-container']/div/text()")[0]

            #scoreDifference = int(awayScore) - int(homeScore)


            #print(scoreDifference)
            #print(awayName)
            #print(homeName)


            #keep track of which players were on the away team and which were on the home team
            #[awayTeam] = boxScoreTree.xpath("//table/thead[position()=1]/tr[@class='team-color-strip']/th/text()")
            awayTeamNum = Util.team_dict[awayName]
            #awayPlayeridList = boxScoreTree.xpath("//table/tbody[position()=1 or position()=2]/tr[contains(@class,'player-46')]/@class") OLD ESPN
            #awayPlayeridList = [x.split("player-46-")[1] for x in awayPlayeridList]
            
            #print(boxScoreTree.xpath("//tr/td/a/@href"))
            #print(boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/*"))
            #print(boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/div[@class='sub-module']/*"))
            #print(boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/div[@class='sub-module']/div/table/tbody/tr/td/a/@href"))
            awayPlayerURLList = boxScoreTree.xpath("//div[@class='col column-one gamepackage-away-wrap']/div[@class='sub-module']/div/table/tbody/tr/td/a/@href")
            awayPlayeridList = [x.split("_/id/")[1] for x in awayPlayerURLList]
            awayStarteridList = awayPlayeridList[0:5]
            awayBenchidList = awayPlayeridList[5:]
            #print(awayPlayeridList)



           # [homeTeam] = boxScoreTree.xpath("//table/thead[position()=4]/tr[@class='team-color-strip']/th/text()")
            homeTeamNum = Util.team_dict[homeName]
            #homePlayeridList = boxScoreTree.xpath("//table/tbody[position()=4 or position()=5]/tr[contains(@class,'player-46')]/@class")
            #homePlayeridList = [x.split("player-46-")[1] for x in homePlayeridList]

            homePlayerURLList = boxScoreTree.xpath("//div[@class='col column-two gamepackage-home-wrap']/div[@class='sub-module']/div/table/tbody/tr/td/a/@href")
            homePlayeridList = [x.split("_/id/")[1] for x in homePlayerURLList]
            homeStarteridList = homePlayeridList[0:5]
            homeBenchidList = homePlayeridList[5:]
            #print(homePlayeridList)





            awayInjuredIDList = []
            #gets player stats for away players and appends that to the game stats
            for playerid in awayStarteridList:
                xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()"
                xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()"
                #print(xPathPositionString)

                gameStatsList = []
                # stores own team's number and also the opposing team's number
                # 0 for away team then 1 for being a starter and then away score (own score) then home score (other score)
                
                gameStatsList += Util.data_date_convert(game_time_info) + [awayTeamNum, homeTeamNum] + [0, 1, int(awayScore),int(homeScore), overtime]      
 
                playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString)
                
                #print(playerStatsList)

                if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1] or len(playerStatsList) != 2):         
                    playerStatsList = Util.playerStatsConvert(playerStatsList)
                   # print(playerStatsList)
                   # playerMap[playerid].append(gameid)
                   # print(gameStatsList+playerStatsList)
        
                    playerMap[playerid][gameid]=(gameStatsList+playerStatsList)
                    #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)})

                else:
                    awayInjuredIDList.append(playerid)
                    print(playerid + " is injured, so stats from game will not count")



            #gets player stats for away players and appends that to the game stats
            for playerid in awayBenchidList:
                xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()"
                xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()"
                #print(xPathPositionString)

                gameStatsList = []
                # stores own team's number and also the opposing team's number
                # 0 for away team then 0 for coming off the benchand then away score (own score) then home score (other score)
                
                gameStatsList += Util.data_date_convert(game_time_info) + [awayTeamNum, homeTeamNum] + [0, 0,int(awayScore),int(homeScore), overtime]      
 
                playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString)
                
                #print(playerStatsList)

                if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1]):         
                    playerStatsList = Util.playerStatsConvert(playerStatsList)
                   # print(playerStatsList)
                   # playerMap[playerid].append(gameid)
                   # print(gameStatsList+playerStatsList)
        
                    playerMap[playerid][gameid]=(gameStatsList+playerStatsList)
                    #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)})

                else:
                    awayInjuredIDList.append(playerid)
                    print(playerid + " is injured, so stats from game will not count")
        



            homeInjuredIDList = []
            #gets player stats for home players and appends that to the game stats
            for playerid in homeStarteridList:
                xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()"
               

                xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()"
                #print(xPathPositionString)

                gameStatsList = []
        
                # stores own team's number and also the opposing team's number
                # 1 for home team, 1 for starter and score difference is calc away score - home score
                #
                gameStatsList += Util.data_date_convert(game_time_info) + [homeTeamNum, awayTeamNum] + [1, 1, int(homeScore), int(awayScore), overtime]

                playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString)

                if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1]):

                
                    playerStatsList = Util.playerStatsConvert(playerStatsList)
                   # print(playerStatsList)
                   # playerMap[playerid].append(gameid)

                    playerMap[playerid][gameid]=(gameStatsList+playerStatsList)
                    #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)})
                else:
                    homeInjuredIDList.append(playerid)
                    print(playerid + " is injured, so stats from game will not count")


                    #gets player stats for home players and appends that to the game stats
            for playerid in homeBenchidList:
                xPathStatsString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/*/text()"
               

                xPathPositionString = "//tr[td/a/@href='http://espn.go.com/nba/player/_/id/" + playerid + "']/td/*/text()"
                #print(xPathPositionString)

                gameStatsList = []
        
                # stores own team's number and also the opposing team's number
                # 1 for home team, 0 for bench and score difference is calc away score - home score
                # 
                gameStatsList += Util.data_date_convert(game_time_info) + [homeTeamNum, awayTeamNum] + [1, 0, int(homeScore), int(awayScore), overtime]

                playerStatsList = [boxScoreTree.xpath(xPathPositionString)[1]] + boxScoreTree.xpath(xPathStatsString)

                if("DNP" not in playerStatsList[1] or "COACH'S DECISION" in playerStatsList[1]):

                
                    playerStatsList = Util.playerStatsConvert(playerStatsList)
                   # print(playerStatsList)
                   # playerMap[playerid].append(gameid)

                    playerMap[playerid][gameid]=(gameStatsList+playerStatsList)
                    #playerMap[playerid] = OrderedDict({gameid:(gameStatsList+playerStatsList)})
                else:
                    homeInjuredIDList.append(playerid)
                    print(playerid + " is injured, so stats from game will not count")

            injuredIDMap[gameid] = (awayInjuredIDList,homeInjuredIDList)

        except (IndexError):
            print("Game " + gameid + " does not exist")
            #raise IndexError
    print(playerMap)

    #need to UNION currentMap(defaultdict in file) and playerMap(recent games)
    for playerid,orderedDict in playerMap.items():

        for gameid,statList in orderedDict.items():
            #TODO: look to make this more efficient
            #if(not gameid in currentMap[playerid] or not playerid in currentMap):
            if(not playerid in currentMap):
                currentMap[playerid] = OrderedDict()
            #print(type(gameid))
            currentMap[playerid][gameid] = statList



    return (currentMap,injuredIDMap)
Пример #2
0
def create_todays_playerMap():
    today_playerMap = defaultdict(OrderedDict)



    schedulePage = requests.get("http://espn.go.com/nba/schedule")
    scheduleTree = html.fromstring(schedulePage.content)
    #print(scheduleTree.xpath("//div[@class='basketball']/div[@id='sched-container']/div[position()=2]/table/caption"))
    date = scheduleTree.xpath("//div[@class='basketball']/div[@id='sched-container']/div[position()=2]/table/caption/text()")[0]
    date = Util.strToDate(date)
    #print(date)

    if(date != datetime.date.today()):
        print("ESPN's schedule page has not updated for today's games")
        #exit()
        date = scheduleTree.xpath("//div[@class='basketball']/div[@id='sched-container']/div[position()=3]/table/caption/text()")[0]
        date = Util.strToDate(date)
        
        todaysGameIDs = [x.split("=")[1] for x in scheduleTree.xpath("//div[@class='basketball']/div[@id='sched-container']/div[position()=3]/table/tbody/tr/td[position()=3]/a/@href")]
       # todayGameTimes = [x+" ET" for x in scheduleTree.xpath("//div[@class='basketball']/div[@id='sched-container']/div[position()=3]/table/tbody/tr/td[position()=3]/a//text()")]
        if(date != datetime.date.today()):
            print("Something is wrong with ESPN's schedule page")
            exit()
    else:
        todaysGameIDs = [x.split("=")[1] for x in scheduleTree.xpath("//div[@class='basketball']/div[@id='sched-container']/div[position()=2]/table/tbody/tr/td[position()=3]/a/@href")]


    #FIX!!!

    for gameid in todaysGameIDs:
        #print("gameid %d" % int(gameid))
        gameBoxScoreURL = "http://espn.go.com/nba/conversation?gameId=" + gameid
        #print(gameBoxScoreURL)
        boxScorePage = requests.get(gameBoxScoreURL)
        boxScoreTree = html.fromstring(boxScorePage.content)


        #first get game data not specific to each player (time,date,score,team numbers etc)
        gameDataList = []
        #print(boxScoreTree.xpath("//span/text()"))
        game_time_info = boxScoreTree.xpath("//div[@class='game-status']/span/@data-date")[0]
        #print(game_time_info)
        [m,d,y,t] = Util.data_date_convert(game_time_info)

    
        #[awayTeamName] = boxScoreTree.xpath("//div[@class='gamehq-wrapper']/div[@class='summary-tabs-container']/div[@class='span-4']/div[position() = 2]/div[position()=1]/div[@class='team-info']/h3/a/text()")
        #[homeTeamName] = boxScoreTree.xpath("//div[@class='gamehq-wrapper']/div[@class='summary-tabs-container']/div[@class='span-4']/div[position() = 2]/div[position()=2]/div[@class='team-info']/h3/a/text()")

        #print(boxScoreTree.xpath("//div[@class='competitors']/*"))

        awayTeamName = boxScoreTree.xpath("//div[@class='competitors']/div[@class='team away']/div[@class='content']/div[@class='team-container']/div[@class='team-info']/a/span[@class='short-name']/text()")[0]
        homeTeamName = boxScoreTree.xpath("//div[@class='competitors']/div[@class='team home']/div[@class='content']/div[@class='team-container']/div[@class='team-info']/a/span[@class='short-name']/text()")[0]

        #print(awayTeamName)
        #print(homeTeamName)


        awayTeamURL = boxScoreTree.xpath("//div[@class='competitors']/div[@class='team away']/div[@class='content']/div[@class='team-container']/div[@class='team-info']/a/@href")[0]
        #print(awayTeamName)
        [a,b]=awayTeamURL.split("_")
        awayRosterURL = a + "roster/_" + b
        awayRosterPage = requests.get("http://espn.go.com" + awayRosterURL)
        awayRosterTree = html.fromstring(awayRosterPage.content)
        awayPlayeridList = [x.split("player-46-")[1] for x in awayRosterTree.xpath("//tr[contains(@class,'player-46')]/@class")]
        #print(awayRosterURL)

        for playerid in awayPlayeridList:
            # playername = playerid_to_playerName(playerid)
            # print(playername)

            today_playerMap[playerid][gameid] = [m,d,y,t,Util.team_dict[awayTeamName],Util.team_dict[homeTeamName],0]

    
        homeTeamURL = boxScoreTree.xpath("//div[@class='competitors']/div[@class='team home']/div[@class='content']/div[@class='team-container']/div[@class='team-info']/a/@href")[0]
        #print(homeTeamURL)
        [a,b]=homeTeamURL.split("_")
        homeRosterURL = a + "roster/_" + b
        #print(homeRosterURL)
        homeRosterPage = requests.get("http://espn.go.com" + homeRosterURL)
        homeRosterTree = html.fromstring(homeRosterPage.content)
        homePlayeridList = [x.split("player-46-")[1] for x in homeRosterTree.xpath("//tr[contains(@class,'player-46')]/@class")]
        #print(homePlayeridList)
        for playerid in homePlayeridList:
            # playername = playerid_to_playerName(playerid)
            # print(playername)
            today_playerMap[playerid][gameid] = [m,d,y,t,Util.team_dict[homeTeamName],Util.team_dict[awayTeamName],1]
    
    return today_playerMap