def main(): data_set = "../income-data/income.train.txt" X, Y, features = get_binary_features(data_set) print (X.shape) data_set = "../income-data/income.dev.txt" X_dev, Y_dev, features = get_binary_features(data_set, features) print (X_dev.shape) perceptron = Perceptron(feature_size=len(X[0,:])) # print ("Before training:") # print(perceptron.test(X, Y)) # perceptron.train(X,Y) # print("After (batch) training:") # print(perceptron.test(X, Y)) perceptron.reset() for j in range(1): for i in range(len(X[:,0])): perceptron.train_online(X[i,:], Y[i]) print ("After single training: ") print(perceptron.test(X,Y)) print ("Average") perceptron.reset() perceptron.average_train(X, Y, maxIter=5) print ("Naive average (with maximum iterations)") perceptron.reset() perceptron.naive_average_train(X, Y, maxIter=10) print(perceptron.test(X,Y)) print ("MIRA") mira = Perceptron(feature_size=len(X[0,:]), mira_aggro=0.0) for j in range(10): for i in range(len(X[:,0])): mira.train_mira(X[i,:], Y[i]) print(mira.test(X,Y)) print ("MIRA Average") mira.reset() mira.train_mira_average(X,Y, maxIter=5) print(mira.test(X,Y))
def main(): data_set = "../income-data/income.train.txt" X, Y, features = get_binary_features(data_set) print(X.shape) data_set = "../income-data/income.dev.txt" X_dev, Y_dev, features = get_binary_features(data_set, features) print(X_dev.shape) # data_set = "../income-data/income.train.txt" # X, Y, features = get_numbered_features(data_set) # print X.shape # data_set = "../income-data/income.dev.txt" # X_dev, Y_dev, features = get_numbered_features(data_set, features) # print X_dev.shape # data_set = "../income-data/income.train.txt" # X, Y, features = get_numbered_binary_features(data_set) # print X.shape # data_set = "../income-data/income.dev.txt" # X_dev, Y_dev, features = get_numbered_binary_features(data_set, features) # print X_dev.shape # data_set = "../income-data/income.train.txt" # X, Y, features = get_binned_features(data_set) # print X.shape # data_set = "../income-data/income.dev.txt" # X_dev, Y_dev, features = get_binned_features(data_set, features) # print X_dev.shape # data_set = "../income-data/income.train.txt" # X, Y, features = get_num_ed_features(data_set) # print X.shape # data_set = "../income-data/income.dev.txt" # X_dev, Y_dev, features = get_num_ed_features(data_set, features) # print X_dev.shape print("---------------------------------------------------------------") print("---------------------------------------------------------------") perceptron = Perceptron(feature_size=len(X[0, :])) perceptron.reset() epochs = 5 count = 0 max_score = 0 max_score_epoch = 0 err_rate = 0 best_err_rate = 0 err_rate_list = np.array([]) print("START PERCEPTRON") for j in range(epochs): print("EPOCH ", j + 1) for i in range(len(X[:, 0])): perceptron.train_online(X[i, :], Y[i]) count += 1 if count % 1000 == 0: score = perceptron.test(X_dev, Y_dev) err_rate = (1.0 - score) * 100 err_rate_list = np.append(err_rate_list, err_rate) epoch_v = (1.0 * j) + ((1.0 * i) / len(X[:, 0])) print("Epoch: ", epoch_v, "Score: ", score, "Error Rate: ", err_rate) if max_score < score: max_score = score best_err_rate = (1.0 - max_score) * 100 max_score_epoch = (1.0 * j) + ((1.0 * i) / len(X[:, 0])) avg_err_rate = np.average(err_rate_list) print("Average Error Rate: ", avg_err_rate) print("Max Score: ", max_score) print("Best Error Rate: ", best_err_rate) print("At Epoch: ", max_score_epoch) print("END PERCEPTRON") print("---------------------------------------------------------------") print("START NAIVE AVERAGE PERCEPTRON") perceptron.reset() na_score = 0 na_err_rate = 0 perceptron.naive_average_train(X, Y, maxIter=5) na_score = perceptron.test(X_dev, Y_dev) * 100 na_err_rate = 100 - na_score print("Score: ", na_score, "Error Rate: ", na_err_rate) print("END NAIVE AVERAGE PERCEPTRON") print("---------------------------------------------------------------") print("START SMART AVERAGE PERCEPTRON") perceptron.reset() sa_score = 0 sa_err_rate = 0 perceptron.average_train(X, Y, maxIter=5) sa_score = perceptron.test(X_dev, Y_dev) * 100 sa_err_rate = 100 - sa_score print("Score: ", sa_score, "Error Rate: ", sa_err_rate) print("END SMART AVERAGE PERCEPTRON") print("---------------------------------------------------------------") print("START MIRA") perceptron.reset() mira_score = 0 mira_err_rate = 0 mira = Perceptron(feature_size=len(X[0, :]), mira_aggro=0.0) for j in range(5): for i in range(len(X[:, 0])): mira.train_mira(X[i, :], Y[i]) mira_score = mira.test(X_dev, Y_dev) * 100 mira_err_rate = 100 - mira_score print("Score: ", mira_score, "Error Rate: ", mira_err_rate) print("END MIRA") # print("---------------------------------------------------------------") # # print("START AVERAGE MIRA") # # mira.reset() # # a_mira_score = 0 # a_mira_err_rate = 0 # # mira.train_mira_average(X, Y, maxIter=5) # a_mira_score = mira.test(X_dev, Y_dev) * 100 # a_mira_err_rate = 100 - a_mira_score # print("Score: ", a_mira_score, "Error Rate: ", a_mira_err_rate) # # print("END AVERAGE MIRA") print("---------------------------------------------------------------") print("---------------------------------------------------------------")