def load_pca(): my_data = genfromtxt('./results/joint_angle_data.csv', delimiter=',') # print("PCA input shape is: ") # print(my_data) # print(my_data.shape) # remember to do a transpose on the input pca_matrix = run_pca(my_data.T) return pca_matrix
def main(): if len(sys.argv) <= 2: print('not enough cmd args') exit() Y, Yt = pca.run_pca() #print('got data') Z, Zt = randomize_data(Y, Yt) '''h = 5 F, S = train(Y, Yt, 10, h) FZ, SZ = train(Z, Zt, 10, h) pwrite(F, S, sys.argv[2]+'total_model_10_'+str(h)) pwrite(FZ, SZ, sys.argv[2]+'total_randomized_10_'+str(h)) F_basic, S_basic = train_basic(Y, Yt, 10, h) FZ_basic, SZ_basic = train_basic(Z, Zt, 10, h) pwrite(F_basic, S_basic, sys.argv[2]+'basic_model_10_'+str(h)) pwrite(FZ_basic, SZ_basic, sys.argv[2]+'basic_randomized_10_'+str(h))''' x = range(0,10) for h in x: try: a = pread(out_dir+'pickled/models/total_model/total_model_10_' + str(h)) #b = pread( out_dir+'pickled/models/total_randomized/total_randomized_10_' + str(h)) c = pread(out_dir+'pickled/models/basic_model/basic_model_10_'+str(h)) #d = pread( out_dir+'pickled/models/basic_randomized/basic_randomized_10_'+str(h)) except: try: F, S = train(Y, Yt, 10, h) #FZ, SZ = train(Z, Zt, 10, h) pwrite(F, S, out_dir+'pickled/models/total_model/total_model_10_' + str(h)) #pwrite(FZ, SZ, out_dir+'pickled/models/total_randomized/total_randomized_10_' + str(h)) F_basic, S_basic = train_basic(Y, Yt, 10, h) #FZ_basic, SZ_basic = train_basic(Z, Zt, 10, h) pwrite(F_basic, S_basic, out_dir+'pickled/models/basic_model/basic_model_10_'+str(h)) #pwrite(FZ_basic, SZ_basic, out_dir+'pickled/models/basic_randomized/basic_randomized_10_'+str(h)) except: print('Singular matrix: ' + str(h)) train_random()
run_naive_bayes(x_train, x_test, y_train, y_test) run_neural_network(x_train, x_test, y_train, y_test) run_perceptron(x_train, x_test, y_train, y_test) run_random_forest(x_train, x_test, y_train, y_test) run_svm(x_train, x_test, y_train, y_test) run_xg_boost(x_train, x_test, y_train, y_test) print( "\n-------------------------------------\nAccuracy with Voting in all 22 features:\n-------------------------------------" ) run_voting(x_train, x_test, y_train, y_test) print( "\n-------------------------------------\nAccuracies with dimensionality reduction using PCA (5 components):\n-------------------------------------" ) new_x_train, new_x_test = run_pca(x_train, x_test, y_train, y_test, 5) run_decision_tree(new_x_train, new_x_test, y_train, y_test) run_k_nearest_neighbour(new_x_train, new_x_test, y_train, y_test) run_logistic_regression(new_x_train, new_x_test, y_train, y_test) run_naive_bayes(new_x_train, new_x_test, y_train, y_test) run_neural_network(new_x_train, new_x_test, y_train, y_test) run_perceptron(new_x_train, new_x_test, y_train, y_test) run_random_forest(new_x_train, new_x_test, y_train, y_test) run_svm(new_x_train, new_x_test, y_train, y_test) run_xg_boost(new_x_train, new_x_test, y_train, y_test) print( "\n-------------------------------------\nAccuracy with Voting along with PCA:\n-------------------------------------" ) run_voting(new_x_train, new_x_test, y_train, y_test)
col.remove('type') col = col[5:15] sc = StandardScaler() temp = sc.fit_transform(df_movie[col]) # df_movie[col] = temp df_standard = df_movie[list(df_movie.describe().columns)] return (df_movie, df_standard) def classify(row): if row['imdbRating'] >= 0 and row['imdbRating'] < 4: return 0 elif row['imdbRating'] >= 4 and row['imdbRating'] < 7: return 1 elif row['imdbRating'] >= 7 and row['imdbRating'] <= 10: return 2 if __name__ == '__main__': df_movie, df_standard = data_prepocessing() run_pca(df_standard, df_movie) df_knn = df_movie df_knn["class"] = df_knn.apply(classify, axis=1) run_knn(df_knn) run_logistic_regression() run_xgboost_cornell() run_xgboost_imdb(df_knn)