def main(): """Do something with the project code! Have fun :) """ preprocessor = Preprocessor() preprocessor.import_labeled_data("data/train.csv") X_total, t_total = preprocessor.encode_labels(use_new_encoder=True) X_train, X_test, t_train, t_test = preprocessor.train_test_split( X_total, t_total) # transform data and overwrite non-transformed data X_train_scaled = preprocessor.scale_data(X_train, use_new_scaler=True) X_test_scaled = preprocessor.scale_data(X_test, use_new_scaler=False) # apply PCA X_train = preprocessor.apply_pca(X_train_scaled, use_new_pca=True, n_components=27, whiten=False) X_test = preprocessor.apply_pca(X_test_scaled, use_new_pca=False) clf = NaiveBayes(X_train, t_train) clf.optimize_hyperparameters() clf.display_general_validation_results() clf.display_cv_results() print("Test accuracy : {:.03f}".format(clf.get_accuracy(X_test, t_test))) print("Test f1-score : {:.03f}".format(clf.get_f1_score(X_test, t_test))) label_predictions = make_new_predictions("data/test.csv", preprocessor, clf)
def run_all_classifiers(): """Naive demonstration of all classifiers on default settings. Does not give optimal solutions because different PCA transformations are needed. """ preprocessor = Preprocessor() preprocessor.import_labeled_data("data/train.csv") # encode labels and split 80/20 train/test X_total, t_total = preprocessor.encode_labels(use_new_encoder=True) X_train, X_test, t_train, t_test = preprocessor.train_test_split( X_total, t_total) # transform data and overwrite non-transformed data X_train = preprocessor.scale_data(X_train, use_new_scaler=True) X_test = preprocessor.scale_data(X_test) # apply pca X_train = preprocessor.apply_pca(X_train, use_new_pca=True) X_test = preprocessor.apply_pca(X_test) # init classifiers clfs = [ clf(X_train, t_train) for clf in [LogisticRegression, Perceptron, SVM, MLP, RandomForest, NaiveBayes] ] # training and cross-validation on default hyperparameters for clf in clfs: clf.optimize_hyperparameters() clf.display_general_validation_results() print("Test accuracy : {:.03f}".format(clf.get_accuracy( X_test, t_test))) print("Test f1-score : {:.03f}".format(clf.get_f1_score( X_test, t_test)))