Exemplo n.º 1
0
def testMethods(nonRookieData=None,rookieData=None,careerData=None):
    """
    The test suite for deciding the best model
    """
    if not careerData:
        seasonStats,careerData,lookUp = getData.main()
    if not (nonRookieData and rookieData):
        nonRookieData,rookieData,careerData=getFeatures.main(careerData)
    resultsNonRookies,predsNonRookies = getCrossVal(nonRookieData,careerData)
    resultsRookies,predsRookies = getRookieCrossVal(rookieData,careerData)
    resultsNonRookies = writeResToPandas(resultsNonRookies,'nonRookies')
    resultsRookies = writeResToPandas(resultsRookies,'rookies')
    return(resultsNonRookies,resultsRookies,predsNonRookies,predsRookies)
Exemplo n.º 2
0
def predictNextSeason(year=2015,nonRookieData='nonRookieData.p',rookieData='rookieData.p',careerData='careerData.p',seasonStats='seasonStats.p'):
    """This module uses previously selected algorithms:
            -Veterans: Random Forest; n_estimators=500, min_samples_split=125
            -Novices: SVM; C=.15, gamma = .015, epsilon= .05
        It then trains the models and generates predictions in csv format

        Inputs:
            year - string, optional (default=2015)

            nonRookieData,rookieData,careerData,seasonStats - dictionary, optional (default = None)

                If None, then the variable is generated using the getFeatures.py file
            
        Outputs:
            predictionNonRookies,predicitonRookies - Pandas Dataframe
                
                The dataframes containing the predictions for both groups
    """
    t0=time.time()
    last2digits=str(year)[-2:]
    season = str((datetime.datetime(year,1,1)-datetime.timedelta(days=365)).year)+'-'+last2digits
    if not (careerData and seasonStats):
        seasonStats,careerData,lookUp = getData.main()
    if not (nonRookieData and rookieData):
        nonRookieData,rookieData,careerData=getFeatures.main(careerData)
    nonRookieData,rookieData,careerData,seasonStats=tryPickle(nonRookieData,rookieData,careerData,seasonStats)
    print('All past data found! Now fitting models ',time.time()-t0)
    nonRookiesModel,nonRookiesTrain,nonRookiesScaler = getModel(nonRookieData,'nonRookies')
    rookiesModel,rookiesTrain,rookiesScaler = getModel(rookieData,'rookies')
    print('Models fitted! Now getting all current players features ',time.time()-t0)
    nonRookies,rookies = findPlayerFeatures(year,seasonStats[year],careerData,
        nonRookiesTrain,rookiesTrain,nonRookiesScaler,rookiesScaler)
    print('Features found! Now making predictions ',time.time()-t0)
    predictionsNonRookies = getPredictions(nonRookies,nonRookiesModel,'nonRookies')
    print('Non-Rookie Predictions made! Now predicting Rookies ',time.time()-t0)
##    predictionsNonRookies.to_csv(season+'_Veteran_Predictions.csv',index=False)
    predictionsRookies = getPredictions(rookies,rookiesModel,'rookies')
##    predictionsRookies.to_csv(season+'_Novice_Predictions.csv',index=False)
    predictionsNonRookies.append(predictionsRookies).to_csv(season+'_Predictions.csv',index=False)
    print('Total Runtime is ',time.time()-t0,'s')
    return(predictionsNonRookies,predictionsRookies)