def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses races98 = RaceParserNoHandicaps('./../Data/born98.csv').races races05 = RaceParserNoHandicaps('./../Data/born05.csv').races horses_train_98, horses_test_98 = split_dataset(horses98) horses_train_05, horses_test_05 = split_dataset(horses05) going_class(horses_train_05) going_class(horses_train_98)
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses races98 = RaceParserNoHandicaps('./../Data/born98.csv').races races05 = RaceParserNoHandicaps('./../Data/born05.csv').races print 'HorsesBorn98 Training Set:' print 'No. of horses: ' + str(len(horses98)) print 'No. of races: ' + str(len(races98)) print 'HorsesBorn05 Training Set:' print 'No. of horses: ' + str(len(horses05)) print 'No. of races: ' + str(len(races05)) #average_race_speeds(races98, races05) #average_horse_speeds(horses98, horses05) #horses_age_record_no(horses98, horses05) #race_distances(races98, races05) #race_records_per_horse(horses98, horses05) race_class_records(races98, races05) ''' rating_vs_speed(horses98) prize_money_vs_speed(horses98) odds_vs_speed(horses98) age_vs_speed(horses98) no_of_runners_vs_speed(horses98) race_class_vs_speed(horses98) weight_vs_speed(horses98) jockeys_claim_vs_speed(horses98) place_vs_speed(horses98) distance_vs_speed(horses98) comptime_vs_speed(horses98) stall_vs_speed(horses98) #goings_vs_speed(horses98) ''' print '' #average_race_speeds(races05) #average_horse_speeds(horses05) '''
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses races98 = RaceParserNoHandicaps('./../Data/born98.csv').races races05 = RaceParserNoHandicaps('./../Data/born05.csv').races ''' HorsesBorn98 Dataset ''' horses_train_98, horses_test_98 = split_dataset(horses98) horses_98_X_train = [] horses_98_y_train = [] for h in horses_train_98: v,s = compute_vector(h) horses_98_X_train.append(v) horses_98_y_train .append(s) print 'No. of instances in training set:' print len(horses_98_X_train) print len(horses_98_y_train) print '' horses_98_X_test = [] horses_98_y_test = [] for h in horses_test_98: v,s = compute_vector(h) horses_98_X_test.append(v) horses_98_y_test.append(s) print 'No. of instances in testing set:' print len(horses_98_X_test) print len(horses_98_y_test) print '' # Create linear regression object regr98 = linear_model.LinearRegression() # Train the model using the training sets regr98.fit(horses_98_X_train, horses_98_y_train) # Coefficients print 'Coefficients:' print regr98.coef_ print '' # Explained variance score: 1 is perfect prediction print 'Variance score:' print regr98.score(horses_98_X_test, horses_98_y_test) print '' print 'Mean absolute error:' print mean_absolute_error(horses_98_y_test, (regr98.predict(horses_98_X_test))) print '' print 'Explained variance:' print explained_variance_score(horses_98_y_test, (regr98.predict(horses_98_X_test))) print '' print 'Mean squared error:' print mean_squared_error(horses_98_y_test, (regr98.predict(horses_98_X_test))) print '' print 'R2 score:' print r2_score(horses_98_y_test, (regr98.predict(horses_98_X_test))) print '' ''' HorsesBorn05 Dataset ''' horses_train_05, horses_test_05 = split_dataset(horses05) horses_05_X_train = [] horses_05_y_train = [] for h in horses_train_05: v,s = compute_vector(h) horses_05_X_train.append(v) horses_05_y_train .append(s) print 'No. of instances in training set:' print len(horses_05_X_train) print len(horses_05_y_train) print '' horses_05_X_test = [] horses_05_y_test = [] for h in horses_test_05: v,s = compute_vector(h) horses_05_X_test.append(v) horses_05_y_test.append(s) print 'No. of instances in testing set:' print len(horses_05_X_test) print len(horses_05_y_test) print '' # Create linear regression object regr05 = linear_model.LinearRegression() # Train the model using the training sets regr05.fit(horses_05_X_train, horses_05_y_train) # Coefficients print 'Coefficients:' print regr05.coef_ print '' # Explained variance score: 1 is perfect prediction print 'Variance score:' print regr05.score(horses_05_X_test, horses_05_y_test) print '' print 'Mean absolute error:' print mean_absolute_error(horses_05_y_test, (regr05.predict(horses_05_X_test))) print '' print 'Explained variance:' print explained_variance_score(horses_05_y_test, (regr05.predict(horses_05_X_test))) print '' print 'Mean squared error:' print mean_squared_error(horses_05_y_test, (regr05.predict(horses_05_X_test))) print '' print 'R2 score:' print r2_score(horses_05_y_test, (regr05.predict(horses_05_X_test))) print ''
def main(): horse_parser_98 = HorseParserNoHandicaps('./../Data/born98.csv') horse_parser_05 = HorseParserNoHandicaps('./../Data/born05.csv') race_parser_98 = RaceParserNoHandicaps('./../Data/born98.csv') race_parser_05 = RaceParserNoHandicaps('./../Data/born05.csv') horses98 = horse_parser_98.horses horses05 = horse_parser_05.horses races98 = race_parser_98.races races05 = race_parser_05.races full_races_98 = get_full_races(races98) full_races_05 = get_full_races(races05) total_races_with_all_horses_98 = no_of_races_with_all_horses(races98) total_races_with_winners_98 = races_with_winning_horse(races98) total_races_with_all_horses_05 = no_of_races_with_all_horses(races05) total_races_with_winners_05 = races_with_winning_horse(races05) average_races_per_horse_98 = average_no_of_races_per_horse(horses98) average_races_per_horse_05 = average_no_of_races_per_horse(horses05) ages98 = get_ages(full_races_98) ages05 = get_ages(full_races_05) no_of_races_per_age_98 = races_at_each_age(full_races_98, ages98) no_of_races_per_age_05 = races_at_each_age(full_races_05, ages05) races_with_k_missing_horses_98 = races_with_k_missing_runners(races98) races_with_k_missing_horses_05 = races_with_k_missing_runners(races05) print 'born98.csv file statistics - without handicap races:' print 'No. of horses: ' + str(len(horses98)) print 'No. of races: ' + str(len(races98)) print 'No. of races for which we have all the horses: ' + str(total_races_with_all_horses_98) print 'No. of races for which we have the winner: ' + str(total_races_with_winners_98) print 'Fraction of races for which we have all the horses: ' + str(float(total_races_with_all_horses_98)/len(races98)) print 'Fraction of races for which we have the winner: ' + str(float(total_races_with_winners_98)/len(races98)) print 'Average no. of races per horse: ' + str(average_races_per_horse_98) print 'No. of races for horses at each age: ' + str(no_of_races_per_age_98) print 'No. of races with k-missing horse records: ' + str(races_with_k_missing_horses_98) print 'No. of different trainers: ' + str(no_of_trainers(horses98)) print 'No. of different jockeys: ' + str(no_of_jockeys(horses98)) print 'Going and Speed: ' going_average_speeds(races98) print 'Race Class and Speed:' class_average_speeds(races98) print 'Race Class and Distance:' class_average_distances(races98) print 'Race Class and Prize:' class_average_prizes(races98) print '' print 'born05.csv file statistics - without handicap races:' print 'No. of horses: ' + str(len(horses05)) print 'No. of races: ' + str(len(races05)) print 'No. of races for which we have all the horses: ' + str(total_races_with_all_horses_05) print 'No. of races for which we have the winner: ' + str(total_races_with_winners_05) print 'Fraction of races for which we have all the horses: ' + str(float(total_races_with_all_horses_05)/len(races05)) print 'Fraction of races for which we have the winner: ' + str(float(total_races_with_winners_05)/len(races05)) print 'Average no. of races per horse: ' + str(average_races_per_horse_05) print 'No. of races for horses at each age: ' + str(no_of_races_per_age_05) print 'No. of races with k-missing horse records: ' + str(races_with_k_missing_horses_05) print 'No. of different trainers: ' + str(no_of_trainers(horses05)) print 'No. of different jockeys: ' + str(no_of_jockeys(horses05)) print 'Going and Speed: ' going_average_speeds(races05) print 'Race Class and Speed:' class_average_speeds(races05) print 'Race Class and Distance:' class_average_distances(races05) print 'Race Class and Prize:' class_average_prizes(races05)
def main(): horse_parser_98 = HorseParser('./../Data/born98.csv') horse_parser_05 = HorseParser('./../Data/born05.csv') race_parser_98 = RaceParser('./../Data/born98.csv') race_parser_05 = RaceParser('./../Data/born05.csv') horse_parser_no_handicaps_98 = HorseParserNoHandicaps('./../Data/born98.csv') horse_parser_no_handicaps_05 = HorseParserNoHandicaps('./../Data/born05.csv') race_parser_no_handicaps_98 = RaceParserNoHandicaps('./../Data/born98.csv') race_parser_no_handicaps_05 = RaceParserNoHandicaps('./../Data/born05.csv') horses98 = horse_parser_98.horses horses05 = horse_parser_05.horses races98 = race_parser_98.races races05 = race_parser_05.races full_races_98 = get_full_races(races98) full_races_05 = get_full_races(races05) total_races_with_all_horses_98 = no_of_races_with_all_horses(races98) total_races_with_winners_98 = no_of_races_with_winner(races98, horses98) total_races_with_all_horses_05 = no_of_races_with_all_horses(races05) total_races_with_winners_05 = no_of_races_with_winner(races05, horses05) average_races_per_horse_98 = average_no_of_races_per_horse(horses98) average_races_per_horse_05 = average_no_of_races_per_horse(horses05) ages98 = get_ages(full_races_98) ages05 = get_ages(full_races_05) no_of_races_per_age_98 = races_at_each_age(full_races_98, ages98) no_of_races_per_age_05 = races_at_each_age(full_races_05, ages05) races_with_k_missing_horses_98 = races_with_k_missing_runners(races98) races_with_k_missing_horses_05 = races_with_k_missing_runners(races05) print 'born98.csv file statistics:' print 'No. of horses: ' + str(len(horses98)) print 'No. of races: ' + str(len(races98)) print 'No. of races for which we have all the horses: ' + str(total_races_with_all_horses_98) print 'No. of races for which we have the winner: ' + str(total_races_with_winners_98) print 'Fraction of races for which we have all the horses: ' + str(float(total_races_with_all_horses_98)/len(races98)) print 'Fraction of races for which we have the winner: ' + str(float(total_races_with_winners_98)/len(races98)) print 'Average no. of races per horse: ' + str(average_races_per_horse_98) print 'No. of races for horses at each age: ' + str(no_of_races_per_age_98) print 'No. of races with k-missing horse records: ' + str(races_with_k_missing_horses_98) print 'No. of horse records with comptime missing: ' + str(horse_parser_98.comptime_missing) print 'No. of race records with comptime missing: ' + str(race_parser_98.comptime_missing) print 'No. of horse records with Irish race class: ' + str(horse_parser_98.irish_races) print 'No. of race records with Irish race class ' + str(race_parser_98.irish_races) print 'No. of races without handicap races:' + str() print '' print 'born05.csv file statistics:' print 'No. of horses: ' + str(len(horses05)) print 'No. of races: ' + str(len(races05)) print 'No. of races for which we have all the horses: ' + str(total_races_with_all_horses_05) print 'No. of races for which we have the winner: ' + str(total_races_with_winners_05) print 'Fraction of races for which we have all the horses: ' + str(float(total_races_with_all_horses_05)/len(races05)) print 'Fraction of races for which we have the winner: ' + str(float(total_races_with_winners_05)/len(races05)) print 'Average no. of races per horse: ' + str(average_races_per_horse_05) print 'No. of races for horses at each age: ' + str(no_of_races_per_age_05) print 'No. of races with k-missing horse records: ' + str(races_with_k_missing_horses_05) print 'No. of horse records with comptime missing: ' + str(horse_parser_05.comptime_missing) print 'No. of race records with comptime missing: ' + str(race_parser_05.comptime_missing) print 'No. of horse records with Irish race class: ' + str(horse_parser_05.irish_races) print 'No. of race records with Irish race class ' + str(race_parser_05.irish_races)
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses races98 = RaceParserNoHandicaps('./../Data/born98.csv').races races05 = RaceParserNoHandicaps('./../Data/born05.csv').races print ''' HorsesBorn98 Dataset ''' horses_train_98, horses_test_98 = split_dataset(horses98) horses_98_X_train = [] horses_98_y_train = [] for h in horses_train_98: v, s = compute_vector(h) horses_98_X_train.append(v) horses_98_y_train.append(s) print 'No. of instances in training set:' print len(horses_98_X_train) print len(horses_98_y_train) print '' horses_98_X_test = [] horses_98_y_test = [] for h in horses_test_98: v, s = compute_vector(h) horses_98_X_test.append(v) horses_98_y_test.append(s) print 'No. of instances in testing set:' print len(horses_98_X_test) print len(horses_98_y_test) print '' # Create linear regression object regr98 = linear_model.LinearRegression(fit_intercept=True) # Cross-validation cv_scores_98 = cross_validation.cross_val_score( regr98, np.array(horses_98_X_train), np.array(horses_98_y_train), scoring='mean_squared_error', cv=5) #print regr98.coeff_ # Print CV scores print '5-fold CV scores using MSE:' print cv_scores_98 print '' # Mean and SD of estimate score print 'Mean of scores: ' + str(cv_scores_98.mean()) print 'SD of scores: ' + str(cv_scores_98.std() * 2) print '' # Train the model using the training sets regr98.fit(np.array(horses_98_X_train), np.array(horses_98_y_train)) # Coefficients print 'Coefficients:' print regr98.coef_ print '' print 'Intercept: ' print regr98.intercept_ print '' # Predict using the testing set horses_98_y_pred = regr98.predict(horses_98_X_test) print 'Mean squared error:' print mean_squared_error(horses_98_y_test, horses_98_y_pred) print '' print 'Mean absolute error:' print mean_absolute_error(horses_98_y_test, horses_98_y_pred) print '' print ''' HorsesBorn05 Dataset ''' horses_train_05, horses_test_05 = split_dataset(horses05) horses_05_X_train = [] horses_05_y_train = [] for h in horses_train_05: v, s = compute_vector(h) horses_05_X_train.append(v) horses_05_y_train.append(s) print 'No. of instances in training set:' print len(horses_05_X_train) print len(horses_05_y_train) print '' horses_05_X_test = [] horses_05_y_test = [] for h in horses_test_05: v, s = compute_vector(h) horses_05_X_test.append(v) horses_05_y_test.append(s) print 'No. of instances in testing set:' print len(horses_05_X_test) print len(horses_05_y_test) print '' # Create linear regression object regr05 = linear_model.LinearRegression(fit_intercept=True) # Cross-validation cv_scores_05 = cross_validation.cross_val_score( regr05, np.array(horses_05_X_train), np.array(horses_05_y_train), scoring='mean_squared_error', cv=5) # Print CV scores print '5-fold CV scores using MSE:' print cv_scores_05 print '' # Mean and SD of estimate score print 'Mean of scores: ' + str(cv_scores_05.mean()) print 'SD of scores: ' + str(cv_scores_05.std() * 2) print '' # Train the model using the training sets regr05.fit(np.array(horses_05_X_train), np.array(horses_05_y_train)) # Coefficients print 'Coefficients:' print regr05.coef_ print '' print 'Intercept: ' print regr05.intercept_ print '' # Predict using the testing set horses_05_y_pred = regr05.predict(horses_05_X_test) print 'Mean squared error:' print mean_squared_error(horses_05_y_test, horses_05_y_pred) print '' print 'Mean absolute error:' print mean_absolute_error(horses_05_y_test, horses_05_y_pred) print ''
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses races98 = RaceParserNoHandicaps('./../Data/born98.csv').races races05 = RaceParserNoHandicaps('./../Data/born05.csv').races print 'HorsesBorn98 Dataset' horses_train_98, horses_test_98 = split_dataset(horses98) horses_98_X_train = [] horses_98_y_train = [] for h in horses_train_98: v, s = compute_vector(h) horses_98_X_train.append(v) horses_98_y_train.append(s) print 'No. of instances in training set:' print len(horses_98_X_train) print len(horses_98_y_train) print '' horses_98_X_test = [] horses_98_y_test = [] for h in horses_test_98: v, s = compute_vector(h) horses_98_X_test.append(v) horses_98_y_test.append(s) print 'No. of instances in testing set:' print len(horses_98_X_test) print len(horses_98_y_test) print '' print 'Create SVR object' # Create svr object svr98 = SVR(kernel='linear', C=1e3) #, gamma=0.1) print 'Training SVR' # Train the model using the training sets svr98.fit(horses_98_X_train, horses_98_y_train) print 'Predicting' horses_98_y_pred = svr98.predict(horses_98_X_test) # Explained variance score: 1 is perfect prediction print 'Variance score:' print svr98.score(horses_98_X_test, horses_98_y_test) print '' print 'Mean absolute error:' print mean_absolute_error(horses_98_y_test, horses_98_y_pred) print '' print 'Explained variance:' print explained_variance_score(horses_98_y_test, horses_98_y_pred) print '' print 'Mean squared error:' print mean_squared_error(horses_98_y_test, horses_98_y_pred) print '' print 'R2 score:' print r2_score(horses_98_y_test, horses_98_y_pred) print ''