示例#1
0
文件: main.py 项目: rabyj/IFT712
def main():
    """Do something with the project code! Have fun :) """

    preprocessor = Preprocessor()
    preprocessor.import_labeled_data("data/train.csv")

    X_total, t_total = preprocessor.encode_labels(use_new_encoder=True)
    X_train, X_test, t_train, t_test = preprocessor.train_test_split(
        X_total, t_total)

    # transform data and overwrite non-transformed data
    X_train_scaled = preprocessor.scale_data(X_train, use_new_scaler=True)
    X_test_scaled = preprocessor.scale_data(X_test, use_new_scaler=False)

    # apply PCA
    X_train = preprocessor.apply_pca(X_train_scaled,
                                     use_new_pca=True,
                                     n_components=27,
                                     whiten=False)
    X_test = preprocessor.apply_pca(X_test_scaled, use_new_pca=False)

    clf = NaiveBayes(X_train, t_train)
    clf.optimize_hyperparameters()
    clf.display_general_validation_results()
    clf.display_cv_results()
    print("Test accuracy : {:.03f}".format(clf.get_accuracy(X_test, t_test)))
    print("Test f1-score : {:.03f}".format(clf.get_f1_score(X_test, t_test)))

    label_predictions = make_new_predictions("data/test.csv", preprocessor,
                                             clf)
示例#2
0
文件: main.py 项目: rabyj/IFT712
def run_all_classifiers():
    """Naive demonstration of all classifiers on default settings.

    Does not give optimal solutions because different PCA transformations are needed.
    """
    preprocessor = Preprocessor()
    preprocessor.import_labeled_data("data/train.csv")

    # encode labels and split 80/20 train/test
    X_total, t_total = preprocessor.encode_labels(use_new_encoder=True)
    X_train, X_test, t_train, t_test = preprocessor.train_test_split(
        X_total, t_total)

    # transform data and overwrite non-transformed data
    X_train = preprocessor.scale_data(X_train, use_new_scaler=True)
    X_test = preprocessor.scale_data(X_test)

    # apply pca
    X_train = preprocessor.apply_pca(X_train, use_new_pca=True)
    X_test = preprocessor.apply_pca(X_test)

    # init classifiers
    clfs = [
        clf(X_train, t_train) for clf in
        [LogisticRegression, Perceptron, SVM, MLP, RandomForest, NaiveBayes]
    ]

    # training and cross-validation on default hyperparameters
    for clf in clfs:

        clf.optimize_hyperparameters()
        clf.display_general_validation_results()
        print("Test accuracy : {:.03f}".format(clf.get_accuracy(
            X_test, t_test)))
        print("Test f1-score : {:.03f}".format(clf.get_f1_score(
            X_test, t_test)))