def main(): ''' Load data, split data, creates adaboost algorithm with decision stump, calculates errors, save final file''' classifier = AdaBoost(DecisionStump) num_sets = 50 T = 100 percentage = 0.9 all_errors_train = [] all_errors_test = [] aver_error_train = [] aver_error_test = [] # split data in the # of datasets split_data(percentage, num_sets) # run for all datasets, for boosting interations = T for i in range(num_sets): data_split_train = './data/bupa_train' + str(i) + ".txt" data_split_test = './data/' + "bupa_test" + str(i) + ".txt" X_train, Y_train = load_data(data_split_train) X_test, Y_test = load_data(data_split_test) score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test) error_train = calculate_error(T, score_train, Y_train) error_test = calculate_error(T, score_test, Y_test) all_errors_train.append(error_train) all_errors_test.append(error_test)
def main(): classifier = AdaBoost(DecisionStump) num_sets = 50 T = 100 percentage = 0.9 all_errors_train = [] all_errors_test = [] aver_error_train = [] aver_error_test = [] split_data(percentage, num_sets) for i in range(num_sets): data_split_train = './data/bupa_train' + str(i) + ".txt" data_split_test = './data/' + "bupa_test" + str(i) + ".txt" X_train, Y_train = load_data(data_split_train) X_test, Y_test = load_data(data_split_test) score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test) error_train = calculate_error(T, score_train, Y_train) error_test = calculate_error(T, score_test, Y_test) all_errors_train.append(error_train) all_errors_test.append(error_test) # calculates the average errors for j in range(T): a_e_train = 0 a_e_test = 0 for i in range(num_sets): a_e_train += all_errors_train[i][j] a_e_test += all_errors_test[i][j] aver_error_train.append(a_e_train/num_sets) aver_error_test.append(a_e_test/num_sets) save_result_final(aver_error_train, 'train') save_result_final(aver_error_test, 'test') dataset_here = "./data/bupa.data" X_all, Y_all = load_data(dataset_here) score_optional = classifier.run_adaboost(X_all, Y_all, T, None, True) save_result_final(score_optional, 'empirical')
def main(): ''' Load data, split data, creates adaboost algorithm with decision stump, calculates errors, save final file. Since this is a binary classifier, we will do for each of the 4 networks, one at time''' classification = [] ada_folder = OUTPUT_FOLDER + 'adaboost/' if not os.path.exists(ada_folder): os.makedirs(ada_folder) output_file = ada_folder + 'results.out' with open(output_file, "w") as f: f.write("# ADABOOST RESULTS, TRAIN/TEST FRACTION: " + str(PERCENTAGE) + "\n") f. write("# Net Norm Set OL? Accu. Train Accu Test\n") # chose classifier classifier = AdaBoost(DecisionStump) # for each normalization: for norm in NORM: # for each set for number in range(1, NUM_SETS+1): ''' with with_outlier ''' with_outlier = True # get input and output file paths input_train = get_input_data('train', number, norm, with_outlier) input_test = get_input_data('test', number, norm, with_outlier) # for each network type: for net_name in NET_NAMES: # get data X_train, Y_train = one_against_all.load_data(input_train, net_name) X_test, Y_test = one_against_all.load_data(input_test, net_name) print 'Calculating adaboost for net ' + net_name + ' with normalization ' + norm + ' and set ' + str(number) score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test) error_train = calculate_error(T, score_train, Y_train) error_test = calculate_error(T, score_test, Y_test) error_train_total = sum(error_train)/len(error_train) error_test_total = sum(error_test)/len(error_test) save_result_final(error_train_total, error_test_total, output_file, net_name, norm, number, with_outlier) classification.append(str(round(error_test_total,3)) +', ' + str(norm) + ', ' + str(number) + ', ' + str(with_outlier)[0] + '\n') ''' with no outlier ''' with_outlier = False # get input and output file paths input_train = get_input_data('train', number, norm, with_outlier) input_test = get_input_data('test', number, norm, with_outlier) # for each network type: for net_name in NET_NAMES: # get data X_train, Y_train = one_against_all.load_data(input_train, net_name) X_test, Y_test = one_against_all.load_data(input_test, net_name) score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test) error_train = calculate_error(T, score_train, Y_train) error_test = calculate_error(T, score_test, Y_test) error_train_total = sum(error_train)/len(error_train) error_test_total = sum(error_test)/len(error_test) save_result_final(error_train_total, error_test_total, output_file, net_name, norm, number, with_outlier) classification.append(str(round(error_test_total,3)) +', ' + str(norm) + ', ' + str(number) + ', ' + str(with_outlier)[0] + '\n') #find best classfiers classification.sort() with open(output_file + 'good_classification', "w") as f: f.write("\n\n\nClassification\n\n") for feat in classification: f.write(feat + '\n') f.write("\n") print 'Results saved at ' + ada_folder print 'Done!!!'