def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses races98 = RaceParserNoHandicaps('./../Data/born98.csv').races races05 = RaceParserNoHandicaps('./../Data/born05.csv').races horses_train_98, horses_test_98 = split_dataset(horses98) horses_train_05, horses_test_05 = split_dataset(horses05) going_class(horses_train_05) going_class(horses_train_98)
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses horses_train_98, horses_test_98 = split_dataset(horses98) horses_train_05, horses_test_05 = split_dataset(horses05) print 'HorsesBorn98 Training Set:' print 'No. of horses: ' + str(len(horses_train_98)) rating_vs_speed(horses_train_98) prize_money_vs_speed(horses_train_98) odds_vs_speed(horses_train_98) age_vs_speed(horses_train_98) no_of_runners_vs_speed(horses_train_98) race_class_vs_speed(horses_train_98) weight_vs_speed(horses_train_98) jockeys_claim_vs_speed(horses_train_98) place_vs_speed(horses_train_98) distance_vs_speed(horses_train_98) comptime_vs_speed(horses_train_98) stall_vs_speed(horses_train_98) rating_vs_odds(horses_train_98) #goings_vs_speed(horses_train_98) print '' print 'HorsesBorn05 Training Set:' print 'No. of horses: ' + str(len(horses_train_05)) rating_vs_speed(horses_train_05) prize_money_vs_speed(horses_train_05) odds_vs_speed(horses_train_05) age_vs_speed(horses_train_05) no_of_runners_vs_speed(horses_train_05) race_class_vs_speed(horses_train_05) weight_vs_speed(horses_train_05) jockeys_claim_vs_speed(horses_train_05) place_vs_speed(horses_train_05) distance_vs_speed(horses_train_05) comptime_vs_speed(horses_train_05) stall_vs_speed(horses_train_05) rating_vs_odds(horses_train_05)
def main(): #horse_parser_98 = HorseParserNoHandicaps('./../Data/born98.csv') #horse_parser_05 = HorseParserNoHandicaps('./../Data/born05.csv') horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses horses_train_98, horses_test_98 = split_dataset(horses98) horses_train_05, horses_test_05 = split_dataset(horses05) print 'HorsesBorn98 Training Set:' print 'No. of horses: ' + str(len(horses_train_98)) jockeys_claim_test(horses_train_98) ''' rating_vs_speed(horses_train_98) prize_money_vs_speed(horses_train_98) odds_vs_speed(horses_train_98) age_vs_speed(horses_train_98) no_of_runners_vs_speed(horses_train_98) race_class_vs_speed(horses_train_98) weight_vs_speed(horses_train_98) jockeys_claim_vs_speed(horses_train_98) place_vs_speed(horses_train_98) distance_vs_speed(horses_train_98) comptime_vs_speed(horses_train_98) stall_vs_speed(horses_train_98) #goings_vs_speed(horses_train_98) ''' print '' print 'HorsesBorn05 Training Set:' print 'No. of horses: ' + str(len(horses_train_05)) jockeys_claim_test(horses_train_05) '''
def main(): model_file_path = "output" + os.sep + "linear_regression_model_mv.sav" ignored_columns = ['ZN', 'CHAS', 'NOX', 'RM', 'DIS', 'RAD', 'TAX', 'PIRATIO', 'B', 'LSTAT'] X, Y = load_data('input' + os.sep + 'housing.csv', False, ignored_columns) X = preprocess(X, "normalize") X_train, y_train, X_test, y_test = split_dataset(X, Y) train(X_train, y_train, model_file_path) y_predicted = predict(X_test, model_file_path) rmse_ration = calculate_rmse_ration(y_test, y_predicted) print("rmse ratio:", rmse_ration)
def main(input_path, output_path, ignored_columns, preprocess_type, training_data_rate, step_length, threshold_rate, max_loop_num, dynamic_step): print("input:", input_path) print("output:", output_path) print("\n") if ignored_columns is not None: print("ignored_columns:", ignored_columns) print("\n") print("preprocess_type:", preprocess_type) print("training_data_rate:", training_data_rate) print("\n") print("threshold_rate:", threshold_rate) print("max_loop_num:", max_loop_num) print("step_length:", step_length) if dynamic_step: print("dynamic stepping ...") else: print("static stepping ...") print("\n") start_time = datetime.now() X, Y = load_data(input_path, True, ignored_columns) X = preprocess(X, preprocess_type) X_train, y_train, X_test, y_test = split_dataset(X, Y, training_data_rate) threshold = gen_threshold(Y, threshold_rate) train(X_train, y_train, output_path, step_length, threshold, max_loop_num, dynamic_step) Y_pred = predict(output_path, X_test) rmse_ration = calculate_rmse_ration(y_test, Y_pred) print("rmse ratio (rmse / y_mean) is:", rmse_ration, "\n") end_time = datetime.now() execution_duration = end_time - start_time print("execution duration:", execution_duration, "\n") return
def main(): ignored_columns = [ 'ZN', 'CHAS', 'NOX', 'RM', 'DIS', 'RAD', 'TAX', 'PIRATIO', 'B', 'LSTAT' ] X, Y = load_data('input' + os.sep + 'housing.csv', True, ignored_columns) X = preprocess(X, "normalize") X_train, y_train, X_test, y_test = split_dataset(X, Y) path = 'output' + os.sep + 'lsm_multivariant.csv' lsm(X_train, y_train, path) y_predicted = predict(path, X_test) rmse_ration = calculate_rmse_ration(y_test, y_predicted) print("rmse ratio:", rmse_ration) return
def run_experiments_without_cross_validation(model_names, features_to_use): dataset_features = utilities.load_features('pasokh') features, targets, labels, documents, all_vec = utilities.split_dataset(dataset_features, features_to_use, 0.40) X_normal = np.array(all_vec) utilities.normalize_dataset(X_normal, features_to_use, 'learn') X_train = np.array(features['train']) X_test = np.array(features['test']) y_train = np.array(targets['train']) y_test = np.array(targets['test']) labels_train = np.array(labels['train']) labels_test = np.array(labels['test']) print("Dataset size: {}".format(len(X_normal))) #print("Number of True/False labels: {}/{}".format(sum(labels), sum(1 for i in labels if not i))) (X_balanced, y_balanced, labels_balanced) = (X_train, y_train, labels_train) #X_balanced, y_balanced, labels_balanced = utilities.balance_dataset(X_train, y_train, labels_train, 3) print("Train set size: {}".format(len(X_balanced))) print("Number of True/False labels: {}/{}".format(sum(labels_balanced), sum(1 for i in labels_balanced if not i))) print("Test set size: {}".format(len(X_test))) print("Number of True/False labels: {}/{}".format(sum(labels_test), sum(1 for i in labels_test if not i))) print("Used features: {}".format(len(X_balanced[0]))) dataset_json = json.loads(utilities.read_file('resources/pasokh/all.json')) is_regressor = True for model_type in model_names: print('**********************' + model_type + '**********************') if model_type == 'dtr': # max_depth=6 regr = tree.DecisionTreeRegressor() regr = regr.fit(X_balanced, y_balanced) export_name = 'dtr' elif model_type == 'linear': regr = linear_model.LinearRegression(normalize=True) # Train the model using the training sets regr.fit(X_balanced, y_balanced) # The coefficients print('Coefficients: \n', regr.coef_) export_name = 'linear' elif model_type == 'svm': regr = SVR(verbose=True, epsilon=0.00001, gamma='auto', tol=.00001) # Train the model using the training sets regr.fit(X_balanced, y_balanced) # The coefficients print('Coefficients: \n', regr.get_params()) export_name = 'svm' elif model_type == 'dummy': regr = RndRegressor() export_name = 'dummy' elif model_type == 'ideal': from IdealRegressor import IdealRegressor regr = IdealRegressor(X_train, y_train) regr.fit(X_test, y_test) export_name = 'ideal' elif model_type == 'nb': # from sklearn import svm # regr = svm.SVC(gamma='scale').fit(X_train, labels_train) from sklearn.naive_bayes import ComplementNB, GaussianNB from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier regr = ComplementNB(alpha=1) regr.fit(X_train, labels_train) is_regressor = False export_name = 'nb' else: print("Regression type is undefined:" + model_type) continue # Make predictions using the testing set model_results = evaluate_model(regr, X_test, X_balanced, y_test, y_balanced, labels_test, labels_balanced, is_regressor) print('Summarizing dataset and evaluating Rouge...') rouge_scores = evaluate_summarizer(regr, dataset_json, features_to_use, True) utilities.print_rouges(rouge_scores) print('*****************************************************************************') return rouge_scores, model_results
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses races98 = RaceParserNoHandicaps('./../Data/born98.csv').races races05 = RaceParserNoHandicaps('./../Data/born05.csv').races print ''' HorsesBorn98 Dataset ''' horses_train_98, horses_test_98 = split_dataset(horses98) horses_98_X_train = [] horses_98_y_train = [] for h in horses_train_98: v,s = compute_vector(h) horses_98_X_train.append(v) horses_98_y_train .append(s) print 'No. of instances in training set:' print len(horses_98_X_train) print len(horses_98_y_train) print '' horses_98_X_test = [] horses_98_y_test = [] for h in horses_test_98: v,s = compute_vector(h) horses_98_X_test.append(v) horses_98_y_test.append(s) print 'No. of instances in testing set:' print len(horses_98_X_test) print len(horses_98_y_test) print '' # Create linear regression object regr98 = linear_model.LinearRegression(fit_intercept=True) # Cross-validation cv_scores_98 = cross_validation.cross_val_score(regr98, np.array(horses_98_X_train), np.array(horses_98_y_train), scoring='mean_squared_error', cv=5) #print regr98.coeff_ # Print CV scores print '5-fold CV scores using MSE:' print cv_scores_98 print '' # Mean and SD of estimate score print 'Mean of scores: ' + str(cv_scores_98.mean()) print 'SD of scores: ' + str(cv_scores_98.std() * 2) print '' # Train the model using the training sets regr98.fit(np.array(horses_98_X_train), np.array(horses_98_y_train)) # Coefficients print 'Coefficients:' print regr98.coef_ print '' print 'Intercept: ' print regr98.intercept_ print '' # Predict using the testing set horses_98_y_pred = regr98.predict(horses_98_X_test) print 'Mean squared error:' print mean_squared_error(horses_98_y_test, horses_98_y_pred) print '' print 'Mean absolute error:' print mean_absolute_error(horses_98_y_test, horses_98_y_pred) print '' ''' ell1 and ell2 metrics computed using actual speeds and those predicted using the training set ''' print 'ell1:' print ell1(horses_98_y_train, regr98.predict(horses_98_X_train)) print '' print 'ell2:' print ell2(horses_98_y_train, regr98.predict(horses_98_X_train)) print '' print ''' HorsesBorn05 Dataset ''' horses_train_05, horses_test_05 = split_dataset(horses05) horses_05_X_train = [] horses_05_y_train = [] for h in horses_train_05: v,s = compute_vector(h) horses_05_X_train.append(v) horses_05_y_train .append(s) print 'No. of instances in training set:' print len(horses_05_X_train) print len(horses_05_y_train) print '' horses_05_X_test = [] horses_05_y_test = [] for h in horses_test_05: v,s = compute_vector(h) horses_05_X_test.append(v) horses_05_y_test.append(s) print 'No. of instances in testing set:' print len(horses_05_X_test) print len(horses_05_y_test) print '' # Create linear regression object regr05 = linear_model.LinearRegression(fit_intercept=True) # Cross-validation cv_scores_05 = cross_validation.cross_val_score(regr05, np.array(horses_05_X_train), np.array(horses_05_y_train), scoring='mean_squared_error', cv=5) # Print CV scores print '5-fold CV scores using MSE:' print cv_scores_05 print '' # Mean and SD of estimate score print 'Mean of scores: ' + str(cv_scores_05.mean()) print 'SD of scores: ' + str(cv_scores_05.std() * 2) print '' # Train the model using the training sets regr05.fit(np.array(horses_05_X_train), np.array(horses_05_y_train)) # Coefficients print 'Coefficients:' print regr05.coef_ print '' print 'Intercept: ' print regr05.intercept_ print '' # Predict using the testing set horses_05_y_pred = regr05.predict(horses_05_X_test) print 'Mean squared error:' print mean_squared_error(horses_05_y_test, horses_05_y_pred) print '' print 'Mean absolute error:' print mean_absolute_error(horses_05_y_test, horses_05_y_pred) print '' ''' ell1 and ell2 metrics computed using actual speeds and those predicted using the training set ''' print 'ell1:' print ell1(horses_05_y_train, regr05.predict(horses_05_X_train)) print '' print 'ell2:' print ell2(horses_05_y_train, regr98.predict(horses_05_X_train)) print ''
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses print ''' HorsesBorn98 Dataset ''' horses_train_98, horses_test_98 = split_dataset(horses98) horses_98_X_train = [] horses_98_y_train = [] for h in horses_train_98: v, s = compute_vector(h) horses_98_X_train.append(v) horses_98_y_train.append(s) print 'No. of instances in training set:' print len(horses_98_X_train) print len(horses_98_y_train) print '' horses_98_X_test = [] horses_98_y_test = [] for h in horses_test_98: v, s = compute_vector(h) horses_98_X_test.append(v) horses_98_y_test.append(s) print 'No. of instances in testing set:' print len(horses_98_X_test) print len(horses_98_y_test) print '' # Create linear regression object regr98 = linear_model.LinearRegression(fit_intercept=True) # Cross-validation cv_scores_98 = cross_validation.cross_val_score( regr98, np.array(horses_98_X_train), np.array(horses_98_y_train), scoring='r2', cv=5) # Print CV scores print '5-fold CV scores using MSE:' print cv_scores_98 print '' # Mean and SD of estimate score print 'Mean of scores: ' + str(cv_scores_98.mean()) print 'SD of scores: ' + str(cv_scores_98.std() * 2) print '' # Train the model using the training sets regr98.fit(np.array(horses_98_X_train), np.array(horses_98_y_train)) # Coefficients print 'Coefficients:' print regr98.coef_ print '' print 'Intercept: ' print regr98.intercept_ print '' # Predict using the testing set horses_98_y_pred = regr98.predict(horses_98_X_test) print 'Mean squared error:' print mean_squared_error(horses_98_y_test, horses_98_y_pred) print '' print 'Mean absolute error:' print mean_absolute_error(horses_98_y_test, horses_98_y_pred) print '' print 'R2 Score:' print r2_score(horses_98_y_test, horses_98_y_pred) print '' print '1-R1 Score:' print r1(horses_98_y_test, horses_98_y_pred) print '' print ''' HorsesBorn05 Dataset ''' horses_train_05, horses_test_05 = split_dataset(horses05) horses_05_X_train = [] horses_05_y_train = [] for h in horses_train_05: v, s = compute_vector(h) horses_05_X_train.append(v) horses_05_y_train.append(s) print 'No. of instances in training set:' print len(horses_05_X_train) print len(horses_05_y_train) print '' horses_05_X_test = [] horses_05_y_test = [] for h in horses_test_05: v, s = compute_vector(h) horses_05_X_test.append(v) horses_05_y_test.append(s) print 'No. of instances in testing set:' print len(horses_05_X_test) print len(horses_05_y_test) print '' # Create linear regression object regr05 = linear_model.LinearRegression(fit_intercept=True) # Cross-validation cv_scores_05 = cross_validation.cross_val_score( regr05, np.array(horses_05_X_train), np.array(horses_05_y_train), scoring='r2', cv=5) # Print CV scores print '5-fold CV scores using MSE:' print cv_scores_05 print '' # Mean and SD of estimate score print 'Mean of scores: ' + str(cv_scores_05.mean()) print 'SD of scores: ' + str(cv_scores_05.std() * 2) print '' # Train the model using the training sets regr05.fit(np.array(horses_05_X_train), np.array(horses_05_y_train)) # Coefficients print 'Coefficients:' print regr05.coef_ print '' print 'Intercept: ' print regr05.intercept_ print '' # Predict using the testing set horses_05_y_pred = regr05.predict(horses_05_X_test) print 'Mean squared error:' print mean_squared_error(horses_05_y_test, horses_05_y_pred) print '' print 'Mean absolute error:' print mean_absolute_error(horses_05_y_test, horses_05_y_pred) print '' print 'R2 Score:' print r2_score(horses_05_y_test, horses_05_y_pred) print '' print '1-R1 Score:' print r1(horses_05_y_pred, horses_05_y_test) print ''
def learn_models(model_names, features_to_use): """ This version splits original texts in dataset for evaluating summaries """ dataset_features = utilities.load_features('CNN') features, targets, labels, documents, all_vec = utilities.split_dataset( dataset_features, features_to_use, 0.28, 'CNN') #return utilities.write_dataset_csv(dataset_features, '/tmp/test.csv') ''' cPickle.dump(features, open('features.pkl', 'wb')) cPickle.dump(targets, open('targets.pkl', 'wb')) cPickle.dump(labels, open('labels.pkl', 'wb')) cPickle.dump(documents, open('documents.pkl', 'wb')) cPickle.dump(all_vec, open('all_vec.pkl', 'wb')) features = cPickle.load(open('features.pkl', 'rb')) targets = cPickle.load(open('targets.pkl', 'rb')) labels = cPickle.load(open('labels.pkl', 'rb')) documents = cPickle.load(open('documents.pkl', 'rb')) all_vec = cPickle.load(open('all_vec.pkl', 'rb')) ''' X_normal = np.array(all_vec) #X_normal = utilities.select_features(features_to_use, X_normal) # X_normal = StandardScaler().fit_transform(dataset[0]) utilities.normalize_dataset(X_normal, features_to_use, 'learn') X_train = np.array(features['train']) X_test = np.array(features['test']) y_train = np.array(targets['train']) y_test = np.array(targets['test']) labels_train = np.array(labels['train']) labels_test = np.array(labels['test']) #X_train = utilities.select_features(features_to_use, X_train) #utilities.normalize_dataset(X_train, features_to_use) #X_test = utilities.select_features(features_to_use, X_test) #utilities.normalize_dataset(X_test, features_to_use) print("Dataset size: {}".format(len(all_vec))) #(X_balanced, y_balanced, labels_balanced) = (X_train, y_train, labels_train) X_balanced, y_balanced, labels_balanced = utilities.balance_dataset( X_train, y_train, labels_train, 1) print("Used features: " + ','.join(features_to_use)) print("Train set size: {}".format(len(X_balanced))) print("Number of True/False labels: {}/{}".format( sum(labels_balanced), sum(1 for i in labels_balanced if not i))) print("Test set size: {}".format(len(X_test))) print("Number of True/False labels: {}/{}".format( sum(labels_test), sum(1 for i in labels_test if not i))) print("Used features: {}".format(len(X_balanced[0]))) dataset_json = json.loads( utilities.read_file('resources/CNN/documents.json')) test_documents = {int(key): dataset_json[key] for key in documents['test']} is_regressor = True for model_type in model_names: print('**********************' + model_type + '**********************') if model_type == 'dtr': # max_depth=6 regr = tree.DecisionTreeRegressor(criterion='friedman_mse') regr = regr.fit(X_balanced, y_balanced) print(regr.get_params()) export_name = 'dtr' elif model_type == 'linear': regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(X_balanced, y_balanced) # The coefficients print('Coefficients: \n', regr.coef_) export_name = 'linear' elif model_type == 'svm': regr = SVR(kernel='rbf', degree=7, verbose=False, epsilon=0.000001, gamma='scale', tol=.0000001, shrinking=True) # Train the model using the training sets regr.fit(X_balanced, y_balanced) # The coefficients print('Coefficients: \n', regr.get_params()) export_name = 'svm' elif model_type == 'dummy': regr = RndRegressor() export_name = 'dummy' is_regressor = False elif model_type == 'ideal': from IdealRegressor import IdealRegressor regr = IdealRegressor(X_train, y_train) #regr.predict(X_train) regr.fit(X_test, y_test) #regr.predict(X_test) export_name = 'ideal' elif model_type == 'nb': #from sklearn import svm #regr = svm.SVC(gamma='scale').fit(X_train, labels_train) from sklearn.naive_bayes import ComplementNB, GaussianNB from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier regr = ComplementNB(alpha=0.015) regr.fit(X_train, labels_train) is_regressor = False export_name = 'nb' else: print("Regression type is undefined:" + model_type) continue # Make predictions using the testing set model_results = Learn.evaluate_model(regr, X_test, X_balanced, y_test, y_balanced, labels_test, labels_balanced, is_regressor) print('Summarizing dataset and evaluating Rouge...') rouge_scores = evaluate_summarizer(regr, test_documents, features_to_use, True) utilities.print_rouges(rouge_scores) utilities.export_model(regr, export_name) print( '*****************************************************************************' ) return rouge_scores, model_results
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses print ''' HorsesBorn98 Dataset ''' horses_train_98, horses_test_98 = split_dataset(horses98) horses_98_X_train = [] horses_98_y_train = [] for h in horses_train_98: v,s = compute_vector(h) horses_98_X_train.append(v) horses_98_y_train .append(s) print 'No. of instances in training set:' print len(horses_98_X_train) print len(horses_98_y_train) print '' horses_98_X_test = [] horses_98_y_test = [] for h in horses_test_98: v,s = compute_vector(h) horses_98_X_test.append(v) horses_98_y_test.append(s) print 'No. of instances in testing set:' print len(horses_98_X_test) print len(horses_98_y_test) print '' # Create linear regression object regr98 = linear_model.LinearRegression(fit_intercept=True) # Train the model using the training sets regr98.fit(np.array(horses_98_X_train), np.array(horses_98_y_train)) # Coefficients print 'Coefficients:' print regr98.coef_ print '' print 'Intercept: ' print regr98.intercept_ print '' # Predict using the testing set horses_98_y_pred = regr98.predict(horses_98_X_test) print 'Mean squared error:' print mean_squared_error(horses_98_y_test, horses_98_y_pred) print '' print 'Mean absolute error:' print mean_absolute_error(horses_98_y_test, horses_98_y_pred) print '' print 'R2 Score:' print r2_score(horses_98_y_test, horses_98_y_pred) print '' print '1-R1 Score:' print r1(horses_98_y_test, horses_98_y_pred) print '' print ''' HorsesBorn05 Dataset ''' horses_train_05, horses_test_05 = split_dataset(horses05) horses_05_X_train = [] horses_05_y_train = [] for h in horses_train_05: v,s = compute_vector(h) horses_05_X_train.append(v) horses_05_y_train .append(s) print 'No. of instances in training set:' print len(horses_05_X_train) print len(horses_05_y_train) print '' horses_05_X_test = [] horses_05_y_test = [] for h in horses_test_05: v,s = compute_vector(h) horses_05_X_test.append(v) horses_05_y_test.append(s) print 'No. of instances in testing set:' print len(horses_05_X_test) print len(horses_05_y_test) print '' # Create linear regression object regr05 = linear_model.LinearRegression(fit_intercept=True) # Train the model using the training sets regr05.fit(np.array(horses_05_X_train), np.array(horses_05_y_train)) # Coefficients print 'Coefficients:' print regr05.coef_ print '' print 'Intercept: ' print regr05.intercept_ print '' # Predict using the testing set horses_05_y_pred = regr05.predict(horses_05_X_test) print 'Mean squared error:' print mean_squared_error(horses_05_y_test, horses_05_y_pred) print '' print 'Mean absolute error:' print mean_absolute_error(horses_05_y_test, horses_05_y_pred) print '' print 'R2 Score:' print r2_score(horses_05_y_test, horses_05_y_pred) print '' print '1-R1 Score:' print r1(horses_05_y_pred, horses_05_y_test) print ''
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses races98 = RaceParserNoHandicaps('./../Data/born98.csv').races races05 = RaceParserNoHandicaps('./../Data/born05.csv').races print 'HorsesBorn98 Dataset' horses_train_98, horses_test_98 = split_dataset(horses98) horses_98_X_train = [] horses_98_y_train = [] for h in horses_train_98: v,s = compute_vector(h) horses_98_X_train.append(v) horses_98_y_train .append(s) print 'No. of instances in training set:' print len(horses_98_X_train) print len(horses_98_y_train) print '' horses_98_X_test = [] horses_98_y_test = [] for h in horses_test_98: v,s = compute_vector(h) horses_98_X_test.append(v) horses_98_y_test.append(s) print 'No. of instances in testing set:' print len(horses_98_X_test) print len(horses_98_y_test) print '' print 'Create SVR object' # Create svr object svr98 = SVR(kernel='linear', C=1e3)#, gamma=0.1) print 'Training SVR' # Train the model using the training sets svr98.fit(horses_98_X_train, horses_98_y_train) print 'Predicting' horses_98_y_pred = svr98.predict(horses_98_X_test) # Explained variance score: 1 is perfect prediction print 'Variance score:' print svr98.score(horses_98_X_test, horses_98_y_test) print '' print 'Mean absolute error:' print mean_absolute_error(horses_98_y_test, horses_98_y_pred) print '' print 'Explained variance:' print explained_variance_score(horses_98_y_test, horses_98_y_pred) print '' print 'Mean squared error:' print mean_squared_error(horses_98_y_test, horses_98_y_pred) print '' print 'R2 score:' print r2_score(horses_98_y_test, horses_98_y_pred) print ''
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses races98 = RaceParserNoHandicaps('./../Data/born98.csv').races races05 = RaceParserNoHandicaps('./../Data/born05.csv').races ''' HorsesBorn98 Dataset ''' horses_train_98, horses_test_98 = split_dataset(horses98) horses_98_X_train = [] horses_98_y_train = [] for h in horses_train_98: v,s = compute_vector(h) horses_98_X_train.append(v) horses_98_y_train .append(s) print 'No. of instances in training set:' print len(horses_98_X_train) print len(horses_98_y_train) print '' horses_98_X_test = [] horses_98_y_test = [] for h in horses_test_98: v,s = compute_vector(h) horses_98_X_test.append(v) horses_98_y_test.append(s) print 'No. of instances in testing set:' print len(horses_98_X_test) print len(horses_98_y_test) print '' # Create linear regression object regr98 = linear_model.LinearRegression() # Train the model using the training sets regr98.fit(horses_98_X_train, horses_98_y_train) # Coefficients print 'Coefficients:' print regr98.coef_ print '' # Explained variance score: 1 is perfect prediction print 'Variance score:' print regr98.score(horses_98_X_test, horses_98_y_test) print '' print 'Mean absolute error:' print mean_absolute_error(horses_98_y_test, (regr98.predict(horses_98_X_test))) print '' print 'Explained variance:' print explained_variance_score(horses_98_y_test, (regr98.predict(horses_98_X_test))) print '' print 'Mean squared error:' print mean_squared_error(horses_98_y_test, (regr98.predict(horses_98_X_test))) print '' print 'R2 score:' print r2_score(horses_98_y_test, (regr98.predict(horses_98_X_test))) print '' ''' HorsesBorn05 Dataset ''' horses_train_05, horses_test_05 = split_dataset(horses05) horses_05_X_train = [] horses_05_y_train = [] for h in horses_train_05: v,s = compute_vector(h) horses_05_X_train.append(v) horses_05_y_train .append(s) print 'No. of instances in training set:' print len(horses_05_X_train) print len(horses_05_y_train) print '' horses_05_X_test = [] horses_05_y_test = [] for h in horses_test_05: v,s = compute_vector(h) horses_05_X_test.append(v) horses_05_y_test.append(s) print 'No. of instances in testing set:' print len(horses_05_X_test) print len(horses_05_y_test) print '' # Create linear regression object regr05 = linear_model.LinearRegression() # Train the model using the training sets regr05.fit(horses_05_X_train, horses_05_y_train) # Coefficients print 'Coefficients:' print regr05.coef_ print '' # Explained variance score: 1 is perfect prediction print 'Variance score:' print regr05.score(horses_05_X_test, horses_05_y_test) print '' print 'Mean absolute error:' print mean_absolute_error(horses_05_y_test, (regr05.predict(horses_05_X_test))) print '' print 'Explained variance:' print explained_variance_score(horses_05_y_test, (regr05.predict(horses_05_X_test))) print '' print 'Mean squared error:' print mean_squared_error(horses_05_y_test, (regr05.predict(horses_05_X_test))) print '' print 'R2 score:' print r2_score(horses_05_y_test, (regr05.predict(horses_05_X_test))) print ''
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses print ''' HorsesBorn98 Dataset ''' horses_train_98, horses_test_98 = split_dataset(horses98) horses_98_X_train = [] horses_98_y_train = [] for h in horses_train_98: v, s = compute_vector(h) horses_98_X_train.append(v) horses_98_y_train.append(s) print 'No. of instances in training set:' print len(horses_98_X_train) print len(horses_98_y_train) print '' horses_98_X_test = [] horses_98_y_test = [] for h in horses_test_98: v, s = compute_vector(h) horses_98_X_test.append(v) horses_98_y_test.append(s) print 'No. of instances in testing set:' print len(horses_98_X_test) print len(horses_98_y_test) print '' # Create linear regression object regr98 = linear_model.LinearRegression(fit_intercept=True) # Train the model using the training sets regr98.fit(np.array(horses_98_X_train), np.array(horses_98_y_train)) # Coefficients print 'Coefficients:' print regr98.coef_ print '' print 'Intercept: ' print regr98.intercept_ print '' # Predict using the testing set horses_98_y_pred = regr98.predict(horses_98_X_test) print 'Mean squared error:' print mean_squared_error(horses_98_y_test, horses_98_y_pred) print '' print 'Mean absolute error:' print mean_absolute_error(horses_98_y_test, horses_98_y_pred) print '' print 'R2 Score:' print r2_score(horses_98_y_test, horses_98_y_pred) print '' print '1-R1 Score:' print r1(horses_98_y_test, horses_98_y_pred) print '' print ''' HorsesBorn05 Dataset ''' horses_train_05, horses_test_05 = split_dataset(horses05) horses_05_X_train = [] horses_05_y_train = [] for h in horses_train_05: v, s = compute_vector(h) horses_05_X_train.append(v) horses_05_y_train.append(s) print 'No. of instances in training set:' print len(horses_05_X_train) print len(horses_05_y_train) print '' horses_05_X_test = [] horses_05_y_test = [] for h in horses_test_05: v, s = compute_vector(h) horses_05_X_test.append(v) horses_05_y_test.append(s) print 'No. of instances in testing set:' print len(horses_05_X_test) print len(horses_05_y_test) print '' # Create linear regression object regr05 = linear_model.LinearRegression(fit_intercept=True) # Train the model using the training sets regr05.fit(np.array(horses_05_X_train), np.array(horses_05_y_train)) # Coefficients print 'Coefficients:' print regr05.coef_ print '' print 'Intercept: ' print regr05.intercept_ print '' # Predict using the testing set horses_05_y_pred = regr05.predict(horses_05_X_test) print 'Mean squared error:' print mean_squared_error(horses_05_y_test, horses_05_y_pred) print '' print 'Mean absolute error:' print mean_absolute_error(horses_05_y_test, horses_05_y_pred) print '' print 'R2 Score:' print r2_score(horses_05_y_test, horses_05_y_pred) print '' print '1-R1 Score:' print r1(horses_05_y_pred, horses_05_y_test) print ''
def main(): horses98 = HorseParserNoHandicaps('./../Data/born98.csv').horses horses05 = HorseParserNoHandicaps('./../Data/born05.csv').horses races98 = RaceParserNoHandicaps('./../Data/born98.csv').races races05 = RaceParserNoHandicaps('./../Data/born05.csv').races print 'HorsesBorn98 Dataset' horses_train_98, horses_test_98 = split_dataset(horses98) horses_98_X_train = [] horses_98_y_train = [] for h in horses_train_98: v, s = compute_vector(h) horses_98_X_train.append(v) horses_98_y_train.append(s) print 'No. of instances in training set:' print len(horses_98_X_train) print len(horses_98_y_train) print '' horses_98_X_test = [] horses_98_y_test = [] for h in horses_test_98: v, s = compute_vector(h) horses_98_X_test.append(v) horses_98_y_test.append(s) print 'No. of instances in testing set:' print len(horses_98_X_test) print len(horses_98_y_test) print '' print 'Create SVR object' # Create svr object svr98 = SVR(kernel='linear', C=1e3) #, gamma=0.1) print 'Training SVR' # Train the model using the training sets svr98.fit(horses_98_X_train, horses_98_y_train) print 'Predicting' horses_98_y_pred = svr98.predict(horses_98_X_test) # Explained variance score: 1 is perfect prediction print 'Variance score:' print svr98.score(horses_98_X_test, horses_98_y_test) print '' print 'Mean absolute error:' print mean_absolute_error(horses_98_y_test, horses_98_y_pred) print '' print 'Explained variance:' print explained_variance_score(horses_98_y_test, horses_98_y_pred) print '' print 'Mean squared error:' print mean_squared_error(horses_98_y_test, horses_98_y_pred) print '' print 'R2 score:' print r2_score(horses_98_y_test, horses_98_y_pred) print ''