Пример #1
0
def CC_CLS_CC(Dataframe, HNAME_List, Raceday):

    """
    Horse's Compotitive Class
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_CLS_CC]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','HJRAT']]
    Underlying_Class = Feature_DF.nlargest(3, 'HJRAT').mean().to_list()[0]

    Races = Extraction_Database("""
                                Select HNAME, RARID, HJRAT CC_CLS_CC from RaceDb
                                where RARID in (
                                Select RARID from RaceDb
                                where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3)
                                """.format(Raceday = Raceday, HNAME_List = HNAME_List))
    Races_AvgHJRAT = Races.groupby('RARID')['CC_CLS_CC'].apply(lambda x: x.nlargest(3).mean())
    Races_AvgHJRAT = Races_AvgHJRAT.reset_index()
    Race_IDs = Extraction_Database("""
                                   Select HNAME, RARID from RaceDb
                                   where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3
                                   """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Merged = Race_IDs.merge(Races_AvgHJRAT)
    Feature_DF = Feature_DF.merge(Merged.groupby('HNAME').mean()['CC_CLS_CC'].reset_index(), how='left')
    Feature_DF.loc[:, 'CC_CLS_CC'].fillna(Underlying_Class, inplace = True)
    Feature_DF.loc[:, 'CC_CLS_CC'] = Underlying_Class - Feature_DF.loc[:, 'CC_CLS_CC']
    Feature_DF.loc[:, 'CC_CLS_CC'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:, ['HNAME', 'CC_CLS_CC']]

    return Feature_DF
Пример #2
0
def CC_WEI_EXP(Dataframe, HNAME_List, Raceday):

    """
    Weight Carrying Experience of Horse
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_WEI_EXP]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','HWEIC']]
    Distance = Dataframe.loc[:,'RADIS'].values[0]

    Horse_Weight_Req = ["(HNAME = '" + row['HNAME'] + "' and HWEIC >= " + str(row['HWEIC']) + ')' for index, row in Feature_DF.iterrows()]
    Horse_Weight_Req = ' or '.join(Horse_Weight_Req)

    Races_above_today = Extraction_Database("""
                                            Select HNAME, RARID from
                                            (Select HNAME, RARID, HWEIC, RADAT, RADIS from RaceDb
                                            where {Horse_Weight_Req})
                                            where RADAT < {Raceday} and RADIS >= {Distance}
                                            """.format(Raceday = Raceday, Horse_Weight_Req = Horse_Weight_Req, Distance=Distance))
    Race_ID_List = '('+str(Races_above_today['RARID'].tolist())[1:-1]+')'
    Speed_Ratings_tdy = Extraction_Database("""
                                            Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb
                                            where RARID in {Race_ID_List} and HNAME in {HNAME_List}
                                            """.format(Race_ID_List=Race_ID_List, HNAME_List=HNAME_List))
    Best_Speed_Rating = Races_above_today.merge(Speed_Ratings_tdy, how='left').groupby('HNAME').max().reset_index().loc[:,['HNAME','BEYER_SPEED']]
    Best_Speed_Rating = Best_Speed_Rating.rename(columns={'HNAME': 'HNAME', 'BEYER_SPEED': 'Primary'})

    Race_heaviest = Extraction_Database("""
                                        Select HNAME, RARID, HWEIC from RaceDb
                                        where RADAT < {Raceday} and HNAME in {HNAME_List} and RADIS >= {Distance}
                                        """.format(Raceday = Raceday, HNAME_List = HNAME_List, Distance=Distance))
    Race_ID_List = '('+str(Race_heaviest['RARID'].tolist())[1:-1]+')'
    Speed_Ratings_heavy = Extraction_Database("""
                                              Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb
                                              where RARID in {Race_ID_List} and HNAME in {HNAME_List}
                                              """.format(Race_ID_List=Race_ID_List, HNAME_List=HNAME_List))
    Backup_Speed_Rating = Race_heaviest.merge(Speed_Ratings_heavy, on=['HNAME','RARID'], how='left')
    Backup_Speed_Rating.loc[:,'BEYER_SPEED'].fillna(Backup_Speed_Rating.loc[:,'BEYER_SPEED'].min(), inplace = True)
    if Backup_Speed_Rating.loc[:,'BEYER_SPEED'].sum() == 0:
        Backup_Speed_Rating.loc[:,'BEYER_SPEED'].fillna(0, inplace = True)

    if len(Backup_Speed_Rating) == 0:
        Feature_DF.loc[:,'CC_WEI_EXP'] = 0
        Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_EXP']]
        return Feature_DF

    idx = Backup_Speed_Rating.groupby(['HNAME'])['HWEIC'].transform(max) == Backup_Speed_Rating['HWEIC']
    Backup_Speed_Rating = Backup_Speed_Rating[idx].groupby('HNAME').mean().reset_index().loc[:,['HNAME','BEYER_SPEED']]
    Backup_Speed_Rating = Backup_Speed_Rating.rename(columns={'HNAME': 'HNAME', 'BEYER_SPEED': 'Back_Up'})

    Feature_DF = Feature_DF.merge(Best_Speed_Rating, how='left').merge(Backup_Speed_Rating, how='left')
    Feature_DF.loc[:,'CC_WEI_EXP'] = Feature_DF.loc[:,'Primary'].fillna(Feature_DF.loc[:,'Back_Up'])
    Feature_DF.loc[:,'CC_WEI_EXP'].fillna(Feature_DF.loc[:,'CC_WEI_EXP'].min(), inplace = True)
    Feature_DF.loc[:,'CC_WEI_EXP'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_EXP']]

    return Feature_DF
Пример #3
0
def Dataset_Extraction(Race_ID_List):
    """
    Function for Extracting Datasets from FeatureDb
    Feature Set : Data used for Feature Engineering
    Modelling Set : Data used for trining base models and Hyperparameter Selection
    Ensemble Set : Data used for training Ensemble Model
    Testing Set : Testing Final Model
    X Dataset for all Sets should be the same
    They should go through the same Preprocessing Pipeline
    Parameter
    --------
    Dataset_Type : Feature, Modelling, Ensemble, Harville, Testing
    Race_ID : pd.Dataframe of RaceID
    Return
    ------
    Output ndArray  of Panda Series

    """
    #Start Timer
    start_time = time.time()

    #Select Race ID where
    Race_ID_List = Extraction_Database("""
                                       Select Distinct RARID from FeatureDb
                                       where RARID in {RARID} and CC_FRB = 0
                                       """.format(
        RARID='(' + str(Race_ID_List['RARID'].tolist())[1:-1] + ')'))
    """
    Constructing X_Dataset
    """
    #Get Feature for one race
    X_Dataset = Extraction_Database("""
                                     Select * from FeatureDb where RARID in {RARID}
                                     Order By RARID, HNAME
                                     """.format(
        RARID='(' + str(Race_ID_List['RARID'].tolist())[1:-1] + ')'))

    #Convert all features into floats
    col_list = X_Dataset.columns[2:]
    for col in col_list:
        X_Dataset[col] = X_Dataset[col].astype(float)

    #Get RADIS, RALOC, RATRA
    X_Condition = Extraction_Database("""
                                      Select RARID, HNAME, RADIS, RALOC, RATRA from RaceDb where RARID in {RARID}
                                      Order by RARID, HNAME
                                      """.format(
        RARID='(' + str(Race_ID_List['RARID'].tolist())[1:-1] + ')'))
    #Merging Dataset
    X_Dataset = X_Condition.merge(X_Dataset, on=['HNAME', 'RARID'])
    """
    Constructing Y_Dataset
    """
    #Ensemble Model
    Y_Dataset = Extraction_Database("""
                                    Select RARID, HNAME, RESFO, RESWL, RESFP, ODPLA
                                     from RaceDb where RARID in {RARID}
                                     Order By RARID, HNAME
                                    """.format(
        RARID='(' + str(Race_ID_List['RARID'].tolist())[1:-1] + ')'))
    #Convert all features into floats
    col_list = Y_Dataset.columns[2:]
    for col in col_list:
        Y_Dataset[col] = Y_Dataset[col].astype(float)

    #Print Time Taken to Load
    print("---- %s Races are Extracted from FeatureDb in %s seconds ----" %
          (len(Race_ID_List), (str(round((time.time() - start_time), 4)))))

    return X_Dataset, Y_Dataset