def getEWMGamePerf(df, dates, i, window, com=0.3, metric='PTS_G'): # get performance over window datesWindow = getDatesWindow(dates, i, window) windowVals = getWindowVals(df, datesWindow, metric) # return ewm performance return predictionMethods.applyEWMA(pd.Series(windowVals), param=com).values
def updateLabel(label, dates, i, dfPlayer, labelsDict, metric='PTS_G', train=True): # unpack label info labelType = list(labelsDict.keys())[0] labelParams = list(labelsDict.values())[0] # label = perf - mean(window) if labelType == 'mean': # get parameters window = labelParams['window'] # get average performance over window datesWindow = getDatesWindow(dates, i, window) windowVals = getWindowVals(dfPlayer, datesWindow, metric) labelShift = np.mean(windowVals) # label = perf - ewm(window)[-1] elif labelType == 'ewm': # get parameters window = labelParams['window'] com = labelParams['com'] # get most recent performance after applying ewm over window datesWindow = getDatesWindow(dates, i, window) windowVals = getWindowVals(dfPlayer, datesWindow, metric) ewm_windowVals = predictionMethods.applyEWMA(pd.Series(windowVals), param=com).values labelShift = ewm_windowVals[-1] # label = perf - prev_performance elif labelType == 'raw': # get previous performance prevDate = dates[i - 1] labelShift = getGamePerf(dfPlayer, prevDate, metric) # label = perf else: labelShift = 0 return label - labelShift if train else label + labelShift
def topNTeammatesPerf(df, teammates, year, window, n=1, com=0.2, metric='PTS'): perfList = np.array([]) for player in teammates: # get teammates info dfPlayer = df.loc[df.Player == player] # get valid years years = dfPlayer.loc[dfPlayer.Year < year, 'Year'].values # get performance over window yearsWindow = years[-window:] windowVals = getWindowVals(dfPlayer, yearsWindow, metric) # get ewm performance ewmPerf = predictionMethods.applyEWMA(pd.Series(windowVals), param=com).values ewmPerf = ewmPerf[-1] if ewmPerf.size else 0 # append to performance list perfList = np.append(perfList, ewmPerf) return heapq.nlargest(n, perfList)
def updateLabel(label, years, i, df, labelsDict, metric='PTS', train=True): # unpack label info labelType = list(labelsDict.keys())[0] labelParams = list(labelsDict.values())[0] # label = perf - mean(window) if labelType == 'mean': # get parameters window = labelParams['window'] # get performance over relevant years yearsWindow = getYearsWindow(years, i, window) windowVals = getWindowVals(df, yearsWindow, metric) # compute mean labelShift = np.mean(windowVals) # label = perf - ewm(window)[-1] elif labelType == 'ewm': # unpack parameters window = labelParams['window'] com = labelParams['com'] # get performance over relevant years yearsWindow = getYearsWindow(years, i, window) windowVals = getWindowVals(df, yearsWindow, metric) # apply ewm and get most recent value ewm_windowVals = predictionMethods.applyEWMA(pd.Series(windowVals), param=com).values labelShift = ewm_windowVals[-1] # label = perf else: labelShift = 0 return label - labelShift if train else label + labelShift
def getFeature(dates, i, dfPlayer, dfLeague, teamsDict, featuresDict, metric='PTS_G'): # initialize featureVec = np.array([]) num_hsvt_features = 0 for feature, featureParams in featuresDict.items(): # compute standard deviation (std) of window values if feature == 'std': # get parameters window = featureParams['window'] # compute standard deviation of performance over relevant dates datesWindow = dates[i - window:i] windowVals = getWindowVals(dfPlayer, datesWindow, metric) featureVal = np.std(windowVals) # append to feature vector featureVec = np.append(featureVec, featureVal) num_hsvt_features += 1 # compute mean of window values if feature == 'mean': # get parameters window = featureParams['window'] # compute average performance over relevant dates datesWindow = dates[i - window:i] windowVals = getWindowVals(dfPlayer, datesWindow, metric) featureVal = np.mean(windowVals) # append to feature vector featureVec = np.append(featureVec, featureVal) num_hsvt_features += 1 # get performance of most recent game after applying ewm on window values if feature == 'ewm': # get parameters window = featureParams['window'] com = featureParams['com'] # compute performance of relevant dates & apply ewm datesWindow = dates[i - window:i] windowVals = getWindowVals(dfPlayer, datesWindow, metric) ewm_windowVals = predictionMethods.applyEWMA(pd.Series(windowVals), param=com).values # get most recent game performance featureVal = ewm_windowVals[-1] # append to feature vector featureVec = np.append(featureVec, featureVal) num_hsvt_features += 1 # get opponent information if feature == 'oppt': # get parameters window = featureParams['window'] # get opponent on game date currDate = dates[i] oppTeam = dfPlayer.loc[dfPlayer.gmDate == currDate, 'opptAbbr'].values[0] # get average performance of opposing teams allowed by oppTeam & league oppTeamOppPerf = getTeamOppPerf(teamsDict, oppTeam, currDate) leagueOppPerf = getLeagueOppPerf(teamsDict, currDate) featureVal = oppTeamOppPerf / leagueOppPerf # append to feature vector featureVec = np.append(featureVec, featureVal) num_hsvt_features += 1 # get team location on game date if feature == 'teamLoc': # get team location on game date currDate = dates[i] featureVal = getTeamLoc(dfPlayer, currDate) # append to feature vector featureVec = np.append(featureVec, featureVal) # get outcome of most recent game after applying ewm on window values if feature == 'gmOutcome': # get parameters window = featureParams['window'] com = featureParams['com'] # compute outcomes of games during dates window & apply ewma datesWindow = dates[i - window:i] gmOutcomes = getGameOutcomes(dfPlayer, datesWindow) gmOutcomes = predictionMethods.applyEWMA(pd.Series(gmOutcomes), param=com).values # get most recent game outcome featureVal = gmOutcomes[-1] # append to feature vector featureVec = np.append(featureVec, featureVal) # get teammate information #if feature == 'teammates': return featureVec, num_hsvt_features
def stats(player, dfLeague, teamsDict, window, oppWindow, com): dfPlayer = dfLeague[dfLeague.Player == player] dates = dfPlayer.gmDate.values currPerfs = np.array([]) deltaPerfs = np.array([]) prevPerfs = np.array([]) prev_mean = np.array([]) prev_prev = np.array([]) prevGmResults = np.array([]) stds = np.array([]) teamLocs = np.array([]) y_list = np.array([]) y_mean_list = np.array([]) mean_list = np.array([]) opps = np.array([]) x_means = np.array([]) for i in range(2, len(dates)): currDate = dates[i] prevDate = dates[i - 1] prevprevDate = dates[i - 2] datesWindow = getDatesWindow(dates, i, window) # get perfomance values currPerf = getGamePerf(dfPlayer, currDate) prevPerf = getGamePerf(dfPlayer, prevDate) deltaPerf = currPerf - prevPerf #windowVals = getWindowVals(dfPlayer, datesWindow) #deltaPerf = currPerf - predictionMethods.applyEWMA(pd.Series(windowVals), param=com).values[-1] meanPerf = np.mean(getSeasonPerf(dfPlayer, prevprevDate)) prevprevPerf = getGamePerf(dfPlayer, prevprevDate) # game outcome gmResults = getGameOutcomes(dfPlayer, datesWindow) gmResults = predictionMethods.applyEWMA(pd.Series(gmResults), param=com).values prevGmResult = gmResults[-1] # std windowVals = getWindowVals(dfPlayer, datesWindow) std = np.std(windowVals) # mean x_mean = np.mean(windowVals) if windowVals.size else 0 # team location teamLoc = getTeamLoc(dfPlayer, currDate) # teammate effect team = getTeam(dfPlayer, currDate) teammates = getTeammates(dfLeague, player, team, currDate) y, y_mean, mean = teammateEffect(dfLeague, teammates, currDate, window, 1, com) # opponent effect (position invariant) """position = dfPlayer.loc[dfPlayer.gmDate == currDate, 'playPos'].values[0] oppTeam = getOppTeam(dfPlayer, currDate) oppTeamOppPerf = getTeamOppPosPerf(teamsDict, oppTeam, currDate, position) leagueOppPerf = getLeagueOppPosPerf(teamsDict, currDate, position, oppWindow) opp = oppTeamOppPerf / leagueOppPerf if leagueOppPerf else 1""" # append x_means = np.append(x_means, x_mean) currPerfs = np.append(currPerfs, currPerf) prevPerfs = np.append(prevPerfs, prevPerf) deltaPerfs = np.append(deltaPerfs, deltaPerf) prev_mean = np.append(prev_mean, prevPerf - meanPerf) prev_prev = np.append(prev_prev, prevPerf - prevprevPerf) prevGmResults = np.append(prevGmResults, prevGmResult) stds = np.append(stds, std) teamLocs = np.append(teamLocs, teamLoc) y_list = np.append(y_list, y) y_mean_list = np.append(y_mean_list, y_mean) mean_list = np.append(mean_list, mean) df = pd.DataFrame({ 'gmDate': dates[2:], 'PTS': currPerfs, 'Delta': deltaPerfs, 'mean(X(t-1))': x_means, 'Prev': prevPerfs, 'std': stds, 'Prev-Mean': prev_mean, 'Prev-Prev': prev_prev, 'PrevGmRslt': prevGmResults, 'teamLoc': teamLocs, 'Y(t-1)': y_list, 'Y(t-1)-mean(Y(t-2))': y_mean_list, 'mean(Y(t-1))': mean_list }) return df
def getFeature(years, i, dfPlayer, dfLeague, featuresDict, metric='PTS'): featureVec = np.array([]) for feature, featureParams in featuresDict.items(): if feature == 'std': # unpack parameters window = featureParams['window'] # get performance over relevant years yearsWindow = getYearsWindow(years, i, window) windowVals = getWindowVals(dfPlayer, yearsWindow, metric) # compute standard deviation featureVal = np.std(windowVals) # append to feature vector featureVec = np.append(featureVec, featureVal) if feature == 'mean': # unpack parameters window = featureParams['window'] # get performance over relevant years yearsWindow = getYearsWindow(years, i, window) windowVals = getWindowVals(dfPlayer, yearsWindow, metric) # compute mean featureVal = np.mean(windowVals) # append to feature vector featureVec = np.append(featureVec, featureVal) if feature == 'ewm': # unpack parameters window = featureParams['window'] com = featureParams['com'] # get performance over relevant years yearsWindow = getYearsWindow(years, i, window) windowVals = getWindowVals(dfPlayer, yearsWindow, metric) # apply ewm and get most recent value ewm_windowVals = predictionMethods.applyEWMA(pd.Series(windowVals), param=com).values featureVal = ewm_windowVals[-1] # append to feature vector featureVec = np.append(featureVec, featureVal) if feature == 'teammates': # unpack parameters window = featureParams['window'] com = featureParams['com'] n = featureParams['n'] # get current year and player currYear = years[i] player = dfPlayer.Player.values[0] # get current team and teammates team = getTeam(dfPlayer, currYear) teammates = getTeammates(dfLeague, player, team, currYear) featureVal = topNTeammatesPerf(dfLeague, teammates, currYear, window, n, com, metric) # append to feature vector featureVec = np.append(featureVec, featureVal) return featureVec
def getFeature(dates, i, dfPlayer, dfLeague, teamsDict, featuresDict, metric='PTS_G'): # initialize featureVec = np.array([]) num_hsvt_features = 0 for feature, featureParams in featuresDict.items(): # compute standard deviation (std) of window values if feature == 'std': # get parameters window = featureParams['window'] # compute standard deviation of performance over relevant dates datesWindow = getDatesWindow(dates, i, window) windowVals = getWindowVals(dfPlayer, datesWindow, metric) featureVal = np.std(windowVals) if windowVals.size else 0 # append to feature vector featureVec = np.append(featureVec, featureVal) num_hsvt_features += 1 # compute mean of window values if feature == 'mean': # get parameters window = featureParams['window'] # compute average performance over relevant dates datesWindow = getDatesWindow(dates, i, window) windowVals = getWindowVals(dfPlayer, datesWindow, metric) featureVal = np.mean(windowVals) if windowVals.size else 0 # append to feature vector featureVec = np.append(featureVec, featureVal) num_hsvt_features += 1 # get performance of most recent game after applying ewm on window values if feature == 'ewm': # get parameters window = featureParams['window'] com = featureParams['com'] # compute performance of relevant dates & apply ewm ewm_windowVals = getEWMGamePerf(dfPlayer, dates, i, window, com=com, metric='PTS_G') # get most recent game performance featureVal = ewm_windowVals[-1] # append to feature vector featureVec = np.append(featureVec, featureVal) num_hsvt_features += 1 # get opponent information if feature == 'oppt': # get parameters window = featureParams['window'] # get opponent on game date currDate = dates[i] oppTeam = getOppTeam(dfPlayer, currDate) # get average performance of opposing teams allowed by oppTeam & league oppTeamOppPerf = getTeamOppPerf(teamsDict, oppTeam, currDate, window) leagueOppPerf = getLeagueOppPerf(teamsDict, currDate, window) # if leagueOppPerf = 0 (no games played yet) then set featureVal = 1 featureVal = oppTeamOppPerf / leagueOppPerf if leagueOppPerf else 1 # append to feature vector featureVec = np.append(featureVec, featureVal) num_hsvt_features += 1 # get outcome of most recent game after applying ewm on window values if feature == 'gmOutcome': # get parameters window = featureParams['window'] com = featureParams['com'] # compute outcomes of games during dates window & apply ewma datesWindow = getDatesWindow(dates, i, window) gmOutcomes = getGameOutcomes(dfPlayer, datesWindow) gmOutcomes = predictionMethods.applyEWMA(pd.Series(gmOutcomes), param=com).values # get most recent game outcome featureVal = gmOutcomes[-1] # append to feature vector featureVec = np.append(featureVec, featureVal) num_hsvt_features += 1 # get teammate information if feature == 'teammates': # get parameters window = featureParams['window'] com = featureParams['com'] n = featureParams['n'] # get current date and player currDate = dates[i] player = dfPlayer.Player.values[0] # get current team and teammates team = getTeam(dfPlayer, currDate) teammates = getTeammates(dfLeague, player, team, currDate) featureVal = topNTeammatesPerf(dfLeague, teammates, currDate, window, n, com, metric) # append to feature vector featureVec = np.append(featureVec, featureVal) num_hsvt_features += n # get delta performance between previous game and prior performances if feature == 'delta': # get parameters window = featureParams['window'] com = featureParams['com'] # get previous performance prevDate = dates[i-1] prevGmPerf = getGamePerf(dfPlayer, prevDate) # get performance over window of games prior to previous date #datesWindow = getDatesWindow(dates, i-1, window) #windowVals = getWindowVals(dfPlayer, datesWindow, metric) #ewm_windowVals = predictionMethods.applyEWMA(pd.Series(windowVals), param=com).values #featureVal = prevGmPerf - ewm_windowVals[-1] # get difference in performances prev_prevDate = dates[i-2] prev_prevGmPerf = getGamePerf(dfPlayer, prev_prevDate) featureVal = prevGmPerf - prev_prevGmPerf # append to feature vector featureVec = np.append(featureVec, featureVal) num_hsvt_features += 1 # get team location on game date if feature == 'teamLoc': # get team location on game date currDate = dates[i] featureVal = getTeamLoc(dfPlayer, currDate) # append to feature vector featureVec = np.append(featureVec, featureVal) return featureVec, num_hsvt_features