def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    horses_train_98, horses_test_98 = split_dataset(horses98)
    horses_train_05, horses_test_05 = split_dataset(horses05)

    going_class(horses_train_05)
    going_class(horses_train_98)
def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    print 'HorsesBorn98 Training Set:'
    print 'No. of horses: ' + str(len(horses98))
    print 'No. of races: ' + str(len(races98))

    print 'HorsesBorn05 Training Set:'
    print 'No. of horses: ' + str(len(horses05))
    print 'No. of races: ' + str(len(races05))

    #average_race_speeds(races98, races05)
    #average_horse_speeds(horses98, horses05)
    #horses_age_record_no(horses98, horses05)
    #race_distances(races98, races05)
    #race_records_per_horse(horses98, horses05)
    race_class_records(races98, races05)
    '''
    rating_vs_speed(horses98)
    prize_money_vs_speed(horses98)  
    odds_vs_speed(horses98)
    age_vs_speed(horses98)
    no_of_runners_vs_speed(horses98)
    race_class_vs_speed(horses98)
    weight_vs_speed(horses98)
    jockeys_claim_vs_speed(horses98)
    place_vs_speed(horses98)
    distance_vs_speed(horses98)
    comptime_vs_speed(horses98)
    stall_vs_speed(horses98)
    #goings_vs_speed(horses98)
    '''
    print ''

    #average_race_speeds(races05)
    #average_horse_speeds(horses05)
    '''
def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    ''' HorsesBorn98 Dataset '''
    horses_train_98, horses_test_98 = split_dataset(horses98)

    horses_98_X_train = []
    horses_98_y_train = []
    for h in horses_train_98:
        v,s = compute_vector(h)
        horses_98_X_train.append(v)
        horses_98_y_train .append(s)

    print 'No. of instances in training set:'
    print len(horses_98_X_train)
    print len(horses_98_y_train)
    print ''

    horses_98_X_test = []
    horses_98_y_test = []
    for h in horses_test_98:
        v,s = compute_vector(h)
        horses_98_X_test.append(v)
        horses_98_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_98_X_test)
    print len(horses_98_y_test)
    print ''
    
    # Create linear regression object
    regr98 = linear_model.LinearRegression()

    # Train the model using the training sets
    regr98.fit(horses_98_X_train, horses_98_y_train)

    # Coefficients
    print 'Coefficients:'
    print regr98.coef_
    print ''


    # Explained variance score: 1 is perfect prediction
    print 'Variance score:'
    print regr98.score(horses_98_X_test, horses_98_y_test)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''

    print 'Explained variance:'
    print explained_variance_score(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''

    print 'Mean squared error:'
    print mean_squared_error(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''

    print 'R2 score:'
    print r2_score(horses_98_y_test, (regr98.predict(horses_98_X_test)))
    print ''


    ''' HorsesBorn05 Dataset '''
    horses_train_05, horses_test_05 = split_dataset(horses05)

    horses_05_X_train = []
    horses_05_y_train = []
    for h in horses_train_05:
        v,s = compute_vector(h)
        horses_05_X_train.append(v)
        horses_05_y_train .append(s)

    print 'No. of instances in training set:'
    print len(horses_05_X_train)
    print len(horses_05_y_train)
    print ''

    horses_05_X_test = []
    horses_05_y_test = []
    for h in horses_test_05:
        v,s = compute_vector(h)
        horses_05_X_test.append(v)
        horses_05_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_05_X_test)
    print len(horses_05_y_test)
    print ''
    
    # Create linear regression object
    regr05 = linear_model.LinearRegression()

    # Train the model using the training sets
    regr05.fit(horses_05_X_train, horses_05_y_train)

    # Coefficients
    print 'Coefficients:'
    print regr05.coef_
    print ''

    # Explained variance score: 1 is perfect prediction
    print 'Variance score:'
    print regr05.score(horses_05_X_test, horses_05_y_test)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''

    print 'Explained variance:'
    print explained_variance_score(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''

    print 'Mean squared error:'
    print mean_squared_error(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''

    print 'R2 score:'
    print r2_score(horses_05_y_test, (regr05.predict(horses_05_X_test)))
    print ''
def main():
    horse_parser_98 = HorseParserNoHandicaps('./../Data/born98.csv')
    horse_parser_05 = HorseParserNoHandicaps('./../Data/born05.csv')

    race_parser_98 = RaceParserNoHandicaps('./../Data/born98.csv')
    race_parser_05 = RaceParserNoHandicaps('./../Data/born05.csv')

    horses98 = horse_parser_98.horses
    horses05 = horse_parser_05.horses

    races98 = race_parser_98.races
    races05 = race_parser_05.races

    full_races_98 = get_full_races(races98)
    full_races_05 = get_full_races(races05)

    total_races_with_all_horses_98 = no_of_races_with_all_horses(races98)
    total_races_with_winners_98 = races_with_winning_horse(races98)

    total_races_with_all_horses_05 = no_of_races_with_all_horses(races05)
    total_races_with_winners_05 = races_with_winning_horse(races05)

    average_races_per_horse_98 = average_no_of_races_per_horse(horses98)
    average_races_per_horse_05 = average_no_of_races_per_horse(horses05)

    ages98 = get_ages(full_races_98)
    ages05 = get_ages(full_races_05)

    no_of_races_per_age_98 = races_at_each_age(full_races_98, ages98)
    no_of_races_per_age_05 = races_at_each_age(full_races_05, ages05)

    races_with_k_missing_horses_98 = races_with_k_missing_runners(races98)
    races_with_k_missing_horses_05 = races_with_k_missing_runners(races05)

    print 'born98.csv file statistics - without handicap races:'
    print 'No. of horses: ' + str(len(horses98))
    print 'No. of races: ' + str(len(races98))
    print 'No. of races for which we have all the horses: ' + str(total_races_with_all_horses_98)
    print 'No. of races for which we have the winner: ' + str(total_races_with_winners_98)
    print 'Fraction of races for which we have all the horses: ' + str(float(total_races_with_all_horses_98)/len(races98))
    print 'Fraction of races for which we have the winner: ' + str(float(total_races_with_winners_98)/len(races98))
    print 'Average no. of races per horse: ' + str(average_races_per_horse_98)
    print 'No. of races for horses at each age: ' + str(no_of_races_per_age_98)
    print 'No. of races with k-missing horse records: ' + str(races_with_k_missing_horses_98)
    print 'No. of different trainers: ' + str(no_of_trainers(horses98))
    print 'No. of different jockeys: ' + str(no_of_jockeys(horses98))
    print 'Going and Speed: '
    going_average_speeds(races98)
    print 'Race Class and Speed:'
    class_average_speeds(races98)
    print 'Race Class and Distance:'
    class_average_distances(races98)
    print 'Race Class and Prize:'
    class_average_prizes(races98)


    print ''

    print 'born05.csv file statistics - without handicap races:'
    print 'No. of horses: ' + str(len(horses05))
    print 'No. of races: ' + str(len(races05))
    print 'No. of races for which we have all the horses: ' + str(total_races_with_all_horses_05)
    print 'No. of races for which we have the winner: ' + str(total_races_with_winners_05)
    print 'Fraction of races for which we have all the horses: ' + str(float(total_races_with_all_horses_05)/len(races05))
    print 'Fraction of races for which we have the winner: ' + str(float(total_races_with_winners_05)/len(races05))
    print 'Average no. of races per horse: ' + str(average_races_per_horse_05)
    print 'No. of races for horses at each age: ' + str(no_of_races_per_age_05)
    print 'No. of races with k-missing horse records: ' + str(races_with_k_missing_horses_05)
    print 'No. of different trainers: ' + str(no_of_trainers(horses05))
    print 'No. of different jockeys: ' + str(no_of_jockeys(horses05))
    print 'Going and Speed: '
    going_average_speeds(races05)
    print 'Race Class and Speed:'
    class_average_speeds(races05)
    print 'Race Class and Distance:'
    class_average_distances(races05)
    print 'Race Class and Prize:'
    class_average_prizes(races05)
示例#5
0
def main():
    horse_parser_98 = HorseParser('./../Data/born98.csv')
    horse_parser_05 = HorseParser('./../Data/born05.csv')

    race_parser_98 = RaceParser('./../Data/born98.csv')
    race_parser_05 = RaceParser('./../Data/born05.csv')

    horse_parser_no_handicaps_98 = HorseParserNoHandicaps('./../Data/born98.csv')
    horse_parser_no_handicaps_05 = HorseParserNoHandicaps('./../Data/born05.csv')

    race_parser_no_handicaps_98 = RaceParserNoHandicaps('./../Data/born98.csv')
    race_parser_no_handicaps_05 = RaceParserNoHandicaps('./../Data/born05.csv')

    horses98 = horse_parser_98.horses
    horses05 = horse_parser_05.horses

    races98 = race_parser_98.races
    races05 = race_parser_05.races

    full_races_98 = get_full_races(races98)
    full_races_05 = get_full_races(races05)

    total_races_with_all_horses_98 = no_of_races_with_all_horses(races98)
    total_races_with_winners_98 = no_of_races_with_winner(races98, horses98)

    total_races_with_all_horses_05 = no_of_races_with_all_horses(races05)
    total_races_with_winners_05 = no_of_races_with_winner(races05, horses05)

    average_races_per_horse_98 = average_no_of_races_per_horse(horses98)
    average_races_per_horse_05 = average_no_of_races_per_horse(horses05)

    ages98 = get_ages(full_races_98)
    ages05 = get_ages(full_races_05)

    no_of_races_per_age_98 = races_at_each_age(full_races_98, ages98)
    no_of_races_per_age_05 = races_at_each_age(full_races_05, ages05)

    races_with_k_missing_horses_98 = races_with_k_missing_runners(races98)
    races_with_k_missing_horses_05 = races_with_k_missing_runners(races05)

    print 'born98.csv file statistics:'
    print 'No. of horses: ' + str(len(horses98))
    print 'No. of races: ' + str(len(races98))
    print 'No. of races for which we have all the horses: ' + str(total_races_with_all_horses_98)
    print 'No. of races for which we have the winner: ' + str(total_races_with_winners_98)
    print 'Fraction of races for which we have all the horses: ' + str(float(total_races_with_all_horses_98)/len(races98))
    print 'Fraction of races for which we have the winner: ' + str(float(total_races_with_winners_98)/len(races98))
    print 'Average no. of races per horse: ' + str(average_races_per_horse_98)
    print 'No. of races for horses at each age: ' + str(no_of_races_per_age_98)
    print 'No. of races with k-missing horse records: ' + str(races_with_k_missing_horses_98)
    print 'No. of horse records with comptime missing: ' + str(horse_parser_98.comptime_missing)
    print 'No. of race records with comptime missing: ' + str(race_parser_98.comptime_missing)
    print 'No. of horse records with Irish race class: ' + str(horse_parser_98.irish_races)
    print 'No. of race records with Irish race class ' + str(race_parser_98.irish_races)
    print 'No. of races without handicap races:' + str()

    print ''

    print 'born05.csv file statistics:'
    print 'No. of horses: ' + str(len(horses05))
    print 'No. of races: ' + str(len(races05))
    print 'No. of races for which we have all the horses: ' + str(total_races_with_all_horses_05)
    print 'No. of races for which we have the winner: ' + str(total_races_with_winners_05)
    print 'Fraction of races for which we have all the horses: ' + str(float(total_races_with_all_horses_05)/len(races05))
    print 'Fraction of races for which we have the winner: ' + str(float(total_races_with_winners_05)/len(races05))
    print 'Average no. of races per horse: ' + str(average_races_per_horse_05)
    print 'No. of races for horses at each age: ' + str(no_of_races_per_age_05)
    print 'No. of races with k-missing horse records: ' + str(races_with_k_missing_horses_05)
    print 'No. of horse records with comptime missing: ' + str(horse_parser_05.comptime_missing)
    print 'No. of race records with comptime missing: ' + str(race_parser_05.comptime_missing)
    print 'No. of horse records with Irish race class: ' + str(horse_parser_05.irish_races)
    print 'No. of race records with Irish race class ' + str(race_parser_05.irish_races)    
def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    print ''' HorsesBorn98 Dataset '''
    horses_train_98, horses_test_98 = split_dataset(horses98)

    horses_98_X_train = []
    horses_98_y_train = []
    for h in horses_train_98:
        v, s = compute_vector(h)
        horses_98_X_train.append(v)
        horses_98_y_train.append(s)

    print 'No. of instances in training set:'
    print len(horses_98_X_train)
    print len(horses_98_y_train)
    print ''

    horses_98_X_test = []
    horses_98_y_test = []
    for h in horses_test_98:
        v, s = compute_vector(h)
        horses_98_X_test.append(v)
        horses_98_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_98_X_test)
    print len(horses_98_y_test)
    print ''

    # Create linear regression object
    regr98 = linear_model.LinearRegression(fit_intercept=True)

    # Cross-validation
    cv_scores_98 = cross_validation.cross_val_score(
        regr98,
        np.array(horses_98_X_train),
        np.array(horses_98_y_train),
        scoring='mean_squared_error',
        cv=5)

    #print regr98.coeff_

    # Print CV scores
    print '5-fold CV scores using MSE:'
    print cv_scores_98
    print ''

    # Mean and SD of estimate score
    print 'Mean of scores: ' + str(cv_scores_98.mean())
    print 'SD of scores: ' + str(cv_scores_98.std() * 2)
    print ''

    # Train the model using the training sets
    regr98.fit(np.array(horses_98_X_train), np.array(horses_98_y_train))

    # Coefficients
    print 'Coefficients:'
    print regr98.coef_
    print ''

    print 'Intercept: '
    print regr98.intercept_
    print ''

    # Predict using the testing set
    horses_98_y_pred = regr98.predict(horses_98_X_test)

    print 'Mean squared error:'
    print mean_squared_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print ''' HorsesBorn05 Dataset '''
    horses_train_05, horses_test_05 = split_dataset(horses05)

    horses_05_X_train = []
    horses_05_y_train = []
    for h in horses_train_05:
        v, s = compute_vector(h)
        horses_05_X_train.append(v)
        horses_05_y_train.append(s)

    print 'No. of instances in training set:'
    print len(horses_05_X_train)
    print len(horses_05_y_train)
    print ''

    horses_05_X_test = []
    horses_05_y_test = []
    for h in horses_test_05:
        v, s = compute_vector(h)
        horses_05_X_test.append(v)
        horses_05_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_05_X_test)
    print len(horses_05_y_test)
    print ''

    # Create linear regression object
    regr05 = linear_model.LinearRegression(fit_intercept=True)

    # Cross-validation
    cv_scores_05 = cross_validation.cross_val_score(
        regr05,
        np.array(horses_05_X_train),
        np.array(horses_05_y_train),
        scoring='mean_squared_error',
        cv=5)

    # Print CV scores
    print '5-fold CV scores using MSE:'
    print cv_scores_05
    print ''

    # Mean and SD of estimate score
    print 'Mean of scores: ' + str(cv_scores_05.mean())
    print 'SD of scores: ' + str(cv_scores_05.std() * 2)
    print ''

    # Train the model using the training sets
    regr05.fit(np.array(horses_05_X_train), np.array(horses_05_y_train))

    # Coefficients
    print 'Coefficients:'
    print regr05.coef_
    print ''

    print 'Intercept: '
    print regr05.intercept_
    print ''

    # Predict using the testing set
    horses_05_y_pred = regr05.predict(horses_05_X_test)

    print 'Mean squared error:'
    print mean_squared_error(horses_05_y_test, horses_05_y_pred)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_05_y_test, horses_05_y_pred)
    print ''
def main():
    horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses
    horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses

    races98 = RaceParserNoHandicaps('./../Data/born98.csv').races
    races05 = RaceParserNoHandicaps('./../Data/born05.csv').races

    print 'HorsesBorn98 Dataset'
    horses_train_98, horses_test_98 = split_dataset(horses98)

    horses_98_X_train = []
    horses_98_y_train = []
    for h in horses_train_98:
        v, s = compute_vector(h)
        horses_98_X_train.append(v)
        horses_98_y_train.append(s)

    print 'No. of instances in training set:'
    print len(horses_98_X_train)
    print len(horses_98_y_train)
    print ''

    horses_98_X_test = []
    horses_98_y_test = []
    for h in horses_test_98:
        v, s = compute_vector(h)
        horses_98_X_test.append(v)
        horses_98_y_test.append(s)

    print 'No. of instances in testing set:'
    print len(horses_98_X_test)
    print len(horses_98_y_test)
    print ''

    print 'Create SVR object'
    # Create svr object
    svr98 = SVR(kernel='linear', C=1e3)  #, gamma=0.1)

    print 'Training SVR'
    # Train the model using the training sets
    svr98.fit(horses_98_X_train, horses_98_y_train)

    print 'Predicting'
    horses_98_y_pred = svr98.predict(horses_98_X_test)

    # Explained variance score: 1 is perfect prediction
    print 'Variance score:'
    print svr98.score(horses_98_X_test, horses_98_y_test)
    print ''

    print 'Mean absolute error:'
    print mean_absolute_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'Explained variance:'
    print explained_variance_score(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'Mean squared error:'
    print mean_squared_error(horses_98_y_test, horses_98_y_pred)
    print ''

    print 'R2 score:'
    print r2_score(horses_98_y_test, horses_98_y_pred)
    print ''