示例#1
0
def getEWMGamePerf(df, dates, i, window, com=0.3, metric='PTS_G'):
    # get performance over window
    datesWindow = getDatesWindow(dates, i, window)
    windowVals = getWindowVals(df, datesWindow, metric)

    # return ewm performance
    return predictionMethods.applyEWMA(pd.Series(windowVals), param=com).values
示例#2
0
def updateLabel(label,
                dates,
                i,
                dfPlayer,
                labelsDict,
                metric='PTS_G',
                train=True):
    # unpack label info
    labelType = list(labelsDict.keys())[0]
    labelParams = list(labelsDict.values())[0]

    # label = perf - mean(window)
    if labelType == 'mean':
        # get parameters
        window = labelParams['window']

        # get average performance over window
        datesWindow = getDatesWindow(dates, i, window)
        windowVals = getWindowVals(dfPlayer, datesWindow, metric)
        labelShift = np.mean(windowVals)

    # label = perf - ewm(window)[-1]
    elif labelType == 'ewm':
        # get parameters
        window = labelParams['window']
        com = labelParams['com']

        # get most recent performance after applying ewm over window
        datesWindow = getDatesWindow(dates, i, window)
        windowVals = getWindowVals(dfPlayer, datesWindow, metric)
        ewm_windowVals = predictionMethods.applyEWMA(pd.Series(windowVals),
                                                     param=com).values
        labelShift = ewm_windowVals[-1]

    # label = perf - prev_performance
    elif labelType == 'raw':
        # get previous performance
        prevDate = dates[i - 1]
        labelShift = getGamePerf(dfPlayer, prevDate, metric)

    # label = perf
    else:
        labelShift = 0
    return label - labelShift if train else label + labelShift
示例#3
0
def topNTeammatesPerf(df, teammates, year, window, n=1, com=0.2, metric='PTS'):
    perfList = np.array([])
    for player in teammates:
        # get teammates info
        dfPlayer = df.loc[df.Player == player]

        # get valid years
        years = dfPlayer.loc[dfPlayer.Year < year, 'Year'].values

        # get performance over window
        yearsWindow = years[-window:]
        windowVals = getWindowVals(dfPlayer, yearsWindow, metric)

        # get ewm performance
        ewmPerf = predictionMethods.applyEWMA(pd.Series(windowVals),
                                              param=com).values
        ewmPerf = ewmPerf[-1] if ewmPerf.size else 0

        # append to performance list
        perfList = np.append(perfList, ewmPerf)
    return heapq.nlargest(n, perfList)
示例#4
0
def updateLabel(label, years, i, df, labelsDict, metric='PTS', train=True):
    # unpack label info
    labelType = list(labelsDict.keys())[0]
    labelParams = list(labelsDict.values())[0]

    # label = perf - mean(window)
    if labelType == 'mean':
        # get parameters
        window = labelParams['window']

        # get performance over relevant years
        yearsWindow = getYearsWindow(years, i, window)
        windowVals = getWindowVals(df, yearsWindow, metric)

        # compute mean
        labelShift = np.mean(windowVals)

    # label = perf - ewm(window)[-1]
    elif labelType == 'ewm':
        # unpack parameters
        window = labelParams['window']
        com = labelParams['com']

        # get performance over relevant years
        yearsWindow = getYearsWindow(years, i, window)
        windowVals = getWindowVals(df, yearsWindow, metric)

        # apply ewm and get most recent value
        ewm_windowVals = predictionMethods.applyEWMA(pd.Series(windowVals),
                                                     param=com).values
        labelShift = ewm_windowVals[-1]

    # label = perf
    else:
        labelShift = 0
    return label - labelShift if train else label + labelShift
示例#5
0
def getFeature(dates,
               i,
               dfPlayer,
               dfLeague,
               teamsDict,
               featuresDict,
               metric='PTS_G'):
    # initialize
    featureVec = np.array([])
    num_hsvt_features = 0

    for feature, featureParams in featuresDict.items():
        # compute standard deviation (std) of window values
        if feature == 'std':
            # get parameters
            window = featureParams['window']

            # compute standard deviation of performance over relevant dates
            datesWindow = dates[i - window:i]
            windowVals = getWindowVals(dfPlayer, datesWindow, metric)
            featureVal = np.std(windowVals)

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)
            num_hsvt_features += 1

        # compute mean of window values
        if feature == 'mean':
            # get parameters
            window = featureParams['window']

            # compute average performance over relevant dates
            datesWindow = dates[i - window:i]
            windowVals = getWindowVals(dfPlayer, datesWindow, metric)
            featureVal = np.mean(windowVals)

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)
            num_hsvt_features += 1

        # get performance of most recent game after applying ewm on window values
        if feature == 'ewm':
            # get parameters
            window = featureParams['window']
            com = featureParams['com']

            # compute performance of relevant dates & apply ewm
            datesWindow = dates[i - window:i]
            windowVals = getWindowVals(dfPlayer, datesWindow, metric)
            ewm_windowVals = predictionMethods.applyEWMA(pd.Series(windowVals),
                                                         param=com).values

            # get most recent game performance
            featureVal = ewm_windowVals[-1]

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)
            num_hsvt_features += 1

        # get opponent information
        if feature == 'oppt':
            # get parameters
            window = featureParams['window']

            # get opponent on game date
            currDate = dates[i]
            oppTeam = dfPlayer.loc[dfPlayer.gmDate == currDate,
                                   'opptAbbr'].values[0]

            # get average performance of opposing teams allowed by oppTeam & league
            oppTeamOppPerf = getTeamOppPerf(teamsDict, oppTeam, currDate)
            leagueOppPerf = getLeagueOppPerf(teamsDict, currDate)
            featureVal = oppTeamOppPerf / leagueOppPerf

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)
            num_hsvt_features += 1

        # get team location on game date
        if feature == 'teamLoc':
            # get team location on game date
            currDate = dates[i]
            featureVal = getTeamLoc(dfPlayer, currDate)

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)

        # get outcome of most recent game after applying ewm on window values
        if feature == 'gmOutcome':
            # get parameters
            window = featureParams['window']
            com = featureParams['com']

            # compute outcomes of games during dates window & apply ewma
            datesWindow = dates[i - window:i]
            gmOutcomes = getGameOutcomes(dfPlayer, datesWindow)
            gmOutcomes = predictionMethods.applyEWMA(pd.Series(gmOutcomes),
                                                     param=com).values

            # get most recent game outcome
            featureVal = gmOutcomes[-1]

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)

        # get teammate information
        #if feature == 'teammates':
    return featureVec, num_hsvt_features
示例#6
0
def stats(player, dfLeague, teamsDict, window, oppWindow, com):
    dfPlayer = dfLeague[dfLeague.Player == player]
    dates = dfPlayer.gmDate.values
    currPerfs = np.array([])
    deltaPerfs = np.array([])
    prevPerfs = np.array([])
    prev_mean = np.array([])
    prev_prev = np.array([])
    prevGmResults = np.array([])
    stds = np.array([])
    teamLocs = np.array([])
    y_list = np.array([])
    y_mean_list = np.array([])
    mean_list = np.array([])
    opps = np.array([])
    x_means = np.array([])

    for i in range(2, len(dates)):
        currDate = dates[i]
        prevDate = dates[i - 1]
        prevprevDate = dates[i - 2]
        datesWindow = getDatesWindow(dates, i, window)

        # get perfomance values
        currPerf = getGamePerf(dfPlayer, currDate)
        prevPerf = getGamePerf(dfPlayer, prevDate)

        deltaPerf = currPerf - prevPerf
        #windowVals = getWindowVals(dfPlayer, datesWindow)
        #deltaPerf = currPerf - predictionMethods.applyEWMA(pd.Series(windowVals), param=com).values[-1]

        meanPerf = np.mean(getSeasonPerf(dfPlayer, prevprevDate))
        prevprevPerf = getGamePerf(dfPlayer, prevprevDate)

        # game outcome
        gmResults = getGameOutcomes(dfPlayer, datesWindow)
        gmResults = predictionMethods.applyEWMA(pd.Series(gmResults),
                                                param=com).values
        prevGmResult = gmResults[-1]

        # std
        windowVals = getWindowVals(dfPlayer, datesWindow)
        std = np.std(windowVals)

        # mean
        x_mean = np.mean(windowVals) if windowVals.size else 0

        # team location
        teamLoc = getTeamLoc(dfPlayer, currDate)

        # teammate effect
        team = getTeam(dfPlayer, currDate)
        teammates = getTeammates(dfLeague, player, team, currDate)
        y, y_mean, mean = teammateEffect(dfLeague, teammates, currDate, window,
                                         1, com)

        # opponent effect (position invariant)
        """position = dfPlayer.loc[dfPlayer.gmDate == currDate, 'playPos'].values[0]
        oppTeam = getOppTeam(dfPlayer, currDate)
        oppTeamOppPerf = getTeamOppPosPerf(teamsDict, oppTeam, currDate, position)
        leagueOppPerf = getLeagueOppPosPerf(teamsDict, currDate, position, oppWindow)
        opp = oppTeamOppPerf / leagueOppPerf if leagueOppPerf else 1"""

        # append
        x_means = np.append(x_means, x_mean)
        currPerfs = np.append(currPerfs, currPerf)
        prevPerfs = np.append(prevPerfs, prevPerf)
        deltaPerfs = np.append(deltaPerfs, deltaPerf)
        prev_mean = np.append(prev_mean, prevPerf - meanPerf)
        prev_prev = np.append(prev_prev, prevPerf - prevprevPerf)
        prevGmResults = np.append(prevGmResults, prevGmResult)
        stds = np.append(stds, std)
        teamLocs = np.append(teamLocs, teamLoc)
        y_list = np.append(y_list, y)
        y_mean_list = np.append(y_mean_list, y_mean)
        mean_list = np.append(mean_list, mean)

    df = pd.DataFrame({
        'gmDate': dates[2:],
        'PTS': currPerfs,
        'Delta': deltaPerfs,
        'mean(X(t-1))': x_means,
        'Prev': prevPerfs,
        'std': stds,
        'Prev-Mean': prev_mean,
        'Prev-Prev': prev_prev,
        'PrevGmRslt': prevGmResults,
        'teamLoc': teamLocs,
        'Y(t-1)': y_list,
        'Y(t-1)-mean(Y(t-2))': y_mean_list,
        'mean(Y(t-1))': mean_list
    })
    return df
示例#7
0
def getFeature(years, i, dfPlayer, dfLeague, featuresDict, metric='PTS'):
    featureVec = np.array([])

    for feature, featureParams in featuresDict.items():
        if feature == 'std':
            # unpack parameters
            window = featureParams['window']

            # get performance over relevant years
            yearsWindow = getYearsWindow(years, i, window)
            windowVals = getWindowVals(dfPlayer, yearsWindow, metric)

            # compute standard deviation
            featureVal = np.std(windowVals)

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)

        if feature == 'mean':
            # unpack parameters
            window = featureParams['window']

            # get performance over relevant years
            yearsWindow = getYearsWindow(years, i, window)
            windowVals = getWindowVals(dfPlayer, yearsWindow, metric)

            # compute mean
            featureVal = np.mean(windowVals)

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)

        if feature == 'ewm':
            # unpack parameters
            window = featureParams['window']
            com = featureParams['com']

            # get performance over relevant years
            yearsWindow = getYearsWindow(years, i, window)
            windowVals = getWindowVals(dfPlayer, yearsWindow, metric)

            # apply ewm and get most recent value
            ewm_windowVals = predictionMethods.applyEWMA(pd.Series(windowVals),
                                                         param=com).values
            featureVal = ewm_windowVals[-1]

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)

        if feature == 'teammates':
            # unpack parameters
            window = featureParams['window']
            com = featureParams['com']
            n = featureParams['n']

            # get current year and player
            currYear = years[i]
            player = dfPlayer.Player.values[0]

            # get current team and teammates
            team = getTeam(dfPlayer, currYear)
            teammates = getTeammates(dfLeague, player, team, currYear)
            featureVal = topNTeammatesPerf(dfLeague, teammates, currYear,
                                           window, n, com, metric)

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)
    return featureVec
示例#8
0
def getFeature(dates, i, dfPlayer, dfLeague, teamsDict, featuresDict, metric='PTS_G'): 
    # initialize
    featureVec = np.array([])
    num_hsvt_features = 0

    for feature, featureParams in featuresDict.items(): 
        # compute standard deviation (std) of window values
        if feature == 'std':
            # get parameters 
            window = featureParams['window']

            # compute standard deviation of performance over relevant dates
            datesWindow = getDatesWindow(dates, i, window)
            windowVals = getWindowVals(dfPlayer, datesWindow, metric)
            featureVal = np.std(windowVals) if windowVals.size else 0 

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)
            num_hsvt_features += 1

        # compute mean of window values
        if feature == 'mean': 
            # get parameters 
            window = featureParams['window']

            # compute average performance over relevant dates
            datesWindow = getDatesWindow(dates, i, window)
            windowVals = getWindowVals(dfPlayer, datesWindow, metric)
            featureVal = np.mean(windowVals) if windowVals.size else 0
            
            # append to feature vector
            featureVec = np.append(featureVec, featureVal)
            num_hsvt_features += 1

        # get performance of most recent game after applying ewm on window values
        if feature == 'ewm': 
            # get parameters 
            window = featureParams['window']
            com = featureParams['com']

            # compute performance of relevant dates & apply ewm
            ewm_windowVals = getEWMGamePerf(dfPlayer, dates, i, window, com=com, metric='PTS_G')
            
            # get most recent game performance 
            featureVal = ewm_windowVals[-1]

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)
            num_hsvt_features += 1

        # get opponent information 
        if feature == 'oppt': 
            # get parameters
            window = featureParams['window']

            # get opponent on game date
            currDate = dates[i]
            oppTeam = getOppTeam(dfPlayer, currDate)

            # get average performance of opposing teams allowed by oppTeam & league
            oppTeamOppPerf = getTeamOppPerf(teamsDict, oppTeam, currDate, window) 
            leagueOppPerf = getLeagueOppPerf(teamsDict, currDate, window) 
            # if leagueOppPerf = 0 (no games played yet) then set featureVal = 1
            featureVal = oppTeamOppPerf / leagueOppPerf if leagueOppPerf else 1

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)
            num_hsvt_features += 1

        # get outcome of most recent game after applying ewm on window values
        if feature == 'gmOutcome':
            # get parameters 
            window = featureParams['window']
            com = featureParams['com']

            # compute outcomes of games during dates window & apply ewma
            datesWindow = getDatesWindow(dates, i, window)
            gmOutcomes = getGameOutcomes(dfPlayer, datesWindow)
            gmOutcomes = predictionMethods.applyEWMA(pd.Series(gmOutcomes), param=com).values
            
            # get most recent game outcome
            featureVal = gmOutcomes[-1]

            # append to feature vector
            featureVec = np.append(featureVec, featureVal) 
            num_hsvt_features += 1

        # get teammate information 
        if feature == 'teammates': 
            # get parameters
            window = featureParams['window']
            com = featureParams['com']
            n = featureParams['n']

            # get current date and player
            currDate = dates[i]
            player = dfPlayer.Player.values[0]

            # get current team and teammates
            team = getTeam(dfPlayer, currDate)
            teammates = getTeammates(dfLeague, player, team, currDate)
            featureVal = topNTeammatesPerf(dfLeague, teammates, currDate, window, n, com, metric)

            # append to feature vector
            featureVec = np.append(featureVec, featureVal)
            num_hsvt_features += n

        # get delta performance between previous game and prior performances
        if feature == 'delta': 
            # get parameters
            window = featureParams['window']
            com = featureParams['com']

            # get previous performance 
            prevDate = dates[i-1]
            prevGmPerf = getGamePerf(dfPlayer, prevDate)

            # get performance over window of games prior to previous date
            #datesWindow = getDatesWindow(dates, i-1, window)
            #windowVals = getWindowVals(dfPlayer, datesWindow, metric)
            #ewm_windowVals = predictionMethods.applyEWMA(pd.Series(windowVals), param=com).values
            #featureVal = prevGmPerf - ewm_windowVals[-1]

            # get difference in performances
            prev_prevDate = dates[i-2]
            prev_prevGmPerf = getGamePerf(dfPlayer, prev_prevDate)
            featureVal = prevGmPerf - prev_prevGmPerf

            # append to feature vector
            featureVec = np.append(featureVec, featureVal) 
            num_hsvt_features += 1

        # get team location on game date
        if feature == 'teamLoc':
            # get team location on game date
            currDate = dates[i]
            featureVal = getTeamLoc(dfPlayer, currDate)

            # append to feature vector
            featureVec = np.append(featureVec, featureVal) 

    return featureVec, num_hsvt_features