def test_nn_epochs(data, verbose=False):
    """
    Tests both the simple classifier and multi-layer perceptron for overfitting by
    running each classifier with 1000 epochs while holding all other parameters constant
    """
    params = {"epochs": [1000], "batch_size": [500]}

    # Build the models
    simple_classifier = build_simple_classifier(data["X_train"])
    mlp = build_mlp(data["X_train"])

    simple_classifier_results = test_nn(
        simple_classifier,
        data,
        'simple_classifier',
        params,
        verbose=verbose,
    )
    mlp_results = test_nn(
        mlp,
        data,
        'mlp',
        params,
        verbose=verbose,
    )

    # Visualize the results
    plot_history(simple_classifier_results, save_fig=True)
    plot_history(mlp_results, save_fig=True)
def test_nn_test_params(data,
                        model_params=None,
                        test_params=None,
                        verbose=False):
    """
    Tests both the simple classifier and multi-layer perceptron for various numbers of
    epochs and batch sizes. The learning rate, optimizer, l2-norm penalty, and dropout 
    are held constant in this test set 
    """
    if not model_params:
        model_params = {
            "learning_rates": 0.001,
            "optimizers": "adam",
            "regularizer": None,
        }
    if not test_params:
        test_params = {
            "epochs": [1, 5, 10, 50, 100],
            "batch_size": [1, 5, 10, 50, 100]
        }

    # Build the models
    simple_classifier = build_simple_classifier(
        data["X_train"],
        learning_rate=model_params["learning_rates"],
        optimizer=model_params["optimizers"])
    mlp = build_mlp(data["X_train"],
                    learning_rate=model_params["learning_rates"],
                    optimizer=model_params["optimizers"])

    # Test the model
    simple_classifier_results = test_nn(
        simple_classifier,
        data,
        'simple_classifier',
        test_params,
        learning_rate=model_params["learning_rates"],
        optimizer=model_params["optimizers"],
        verbose=verbose)
    mlp_results = test_nn(mlp,
                          data,
                          'mlp',
                          test_params,
                          learning_rate=model_params["learning_rates"],
                          optimizer=model_params["optimizers"],
                          verbose=verbose)

    plot_nn_heatmap(simple_classifier_results, plot_type="test", save_fig=True)
    plot_nn_heatmap(mlp_results, plot_type="test", save_fig=True)
    return simple_classifier_results, mlp_results
示例#3
0
def main():
    # Set up GPU/CPU
    setup_gpu(gpu=False)

    # Get the input data
    data = get_data(data_choice="single_date")

    # Set up parameters
    epochs = [150]
    batch_size = [300]
    dropout = True
    regularizer = 0.0001
    optimizer = "adam"
    learning_rate = 0.001
    neighbours = [100]
    estimators = [100]

    # Test the simple classifier parameters
    simple_classifier = build_simple_classifier(X_train=data["X_train"],
                                                learning_rate=learning_rate,
                                                optimizer=optimizer,
                                                regularizer=regularizer,
                                                dropout=dropout)
    simple_classifier_results = test_nn(
        model=simple_classifier,
        data=data,
        model_name='simple_classifier',
        params={
            "epochs": epochs,
            "batch_size": batch_size
        },
        optimizer=optimizer,
        learning_rate=learning_rate,
        regularization=regularizer,
        dropout=dropout,
        verbose=False,
    )

    # Test MLP
    mlp = build_mlp(X_train=data["X_train"],
                    learning_rate=learning_rate,
                    optimizer=optimizer,
                    regularizer=regularizer,
                    dropout=dropout)
    mlp_results = test_nn(
        model=mlp,
        data=data,
        model_name='mlp',
        params={
            "epochs": epochs,
            "batch_size": batch_size
        },
        optimizer=optimizer,
        learning_rate=learning_rate,
        regularization=regularizer,
        dropout=dropout,
        verbose=False,
    )

    # Test SVM
    svm_classifier = build_svm()
    svm_results = model_predict(
        model_name='svm',
        model=svm_classifier,
        data=data,
    )

    # Test KNN
    knn_results = test_knn(data=data, neighbours=neighbours, verbose=False)

    # Test RF
    rf_results = test_rf(data=data, estimators=estimators, verbose=False)

    # Compare Classifiers
    all_results = [
        simple_classifier_results, mlp_results, [svm_results], knn_results,
        rf_results
    ]

    # Print accuracy for each classifier
    for result in all_results:
        print("{} ACCURACY: {}".format(
            result[0]["name"].upper().replace("_", " "),
            result[0]["accuracy"]))

    # Plot results for each classifier as a bar graph
    plot_all_results(all_results, save_fig=True)
def test_by_date(params=None, verbose=False):
    """
    Determines the accuracy for each classifier as a function of the date
    """
    # Get the input data
    data_list = get_data(data_choice="by_date")

    # Set up parameters
    if not params:
        params = {
            "nn_params": {
                "epochs": [150],
                "batch_size": [300]
            },
            "neighbours": [5, 10],
            "estimators": [10, 50, 100]
        }

    # Set up empty variables
    best_results = {
        "simple_classifier": [],
        "mlp": [],
        "svm": [],
        "knn": [],
        "rf": []
    }

    dates = []
    for data in data_list:
        # If all the training data for this date is the same, then don't use this date
        if len(np.unique(data["y_train"])) == 1:
            continue

        # Test the simple classifier parameters
        simple_classifier = build_simple_classifier(data["X_train"])
        simple_classifier_results = test_nn(
            simple_classifier,
            data,
            'simple_classifier',
            params["nn_params"],
            verbose=verbose,
        )
        # Test different learning rates and optimizers,
        # Test MLP
        mlp = build_mlp(data["X_train"])
        mlp_results = test_nn(mlp,
                              data,
                              'mlp',
                              params["nn_params"],
                              verbose=verbose)

        # Test SVM
        svm_classifier = build_svm()
        svm_results = model_predict(
            'svm',
            svm_classifier,
            data,
        )

        # Test KNN
        knn_results = test_knn(data, params["neighbours"], verbose=verbose)

        # Test RF
        rf_results = test_rf(data, params["estimators"], verbose=verbose)

        # Visualize results
        all_results = [
            simple_classifier_results, mlp_results, [svm_results], knn_results,
            rf_results
        ]
        # Sort all results
        for result_list in all_results:
            # Sort according to accuracy
            sorted_list = sorted(result_list,
                                 key=itemgetter("accuracy"),
                                 reverse=True)
            result = sorted_list[0]
            best_results[result["name"]].append(result["accuracy"] * 100)

        dates.append(data["date"])
    plot_accuracy_by_date_subplot(dates, best_results, save_fig=True)
    return best_results
def test_nn_reg(data, model_params=None, test_params=None, verbose=False):
    """
    Tests both the simple classifier and multi-layer perceptron for different values for
    l2-norm penalty and including dropout or not. The learning rate, optimizer, number of 
    epochs, and batch size are held constant in this test set 
    """
    if not model_params:
        model_params = {
            "learning_rates": 0.01,
            "optimizers": "adam",
            "regularizer": [0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001],
            "dropout": [True, False]
        }

    if not test_params:
        test_params = {"epochs": [100], "batch_size": [500]}

    all_simple_results = []
    all_mlp_results = []
    for regularizer in model_params["regularizer"]:
        for dropout in model_params["dropout"]:
            # Build the models
            simple_classifier = build_simple_classifier(
                data["X_train"],
                learning_rate=model_params["learning_rates"],
                optimizer=model_params["optimizers"],
                regularizer=regularizer,
                dropout=dropout,
            )
            mlp = build_mlp(data["X_train"],
                            learning_rate=model_params["learning_rates"],
                            optimizer=model_params["optimizers"],
                            regularizer=regularizer,
                            dropout=dropout)

            # Test the model
            if verbose:
                print("SIMPLE CLASSIFIER - REGULARIZER {}, DROPOUT {}".format(
                    regularizer, dropout))
            simple_classifier_results = test_nn(simple_classifier,
                                                data,
                                                'simple_classifier',
                                                test_params,
                                                model_params["optimizers"],
                                                model_params["learning_rates"],
                                                regularizer,
                                                dropout,
                                                verbose=verbose)
            if verbose:
                print("MLP - REGULARIZER {}, DROPOUT {}".format(
                    regularizer, dropout))
            mlp_results = test_nn(mlp,
                                  data,
                                  'mlp',
                                  test_params,
                                  model_params["optimizers"],
                                  model_params["learning_rates"],
                                  regularizer,
                                  dropout,
                                  verbose=verbose)

            all_simple_results = all_simple_results + simple_classifier_results
            all_mlp_results = all_mlp_results + mlp_results

    plot_nn_heatmap(all_simple_results,
                    plot_type="regularization",
                    save_fig=True)
    plot_nn_heatmap(all_mlp_results, plot_type="regularization", save_fig=True)

    return all_simple_results, all_mlp_results
def test_nn_optimizer(data,
                      model_params=None,
                      test_params=None,
                      verbose=False):
    """
    Tests both the simple classifier and multi-layer perceptron for different optimizers 
    and learning rates. The number of epochs, batch size, and regularizer are help constant
    in this test set 
    """
    if not model_params:
        model_params = {
            "learning_rates": [0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001],
            "optimizers": [
                "adam", "sgd", "rmsprop", "adagrad", "adadelta", "adamax",
                "nadam"
            ],
            "regularizer":
            None,
        }
    if not test_params:
        test_params = {"epochs": [100], "batch_size": [500]}

    all_simple_results = []
    all_mlp_results = []
    for learning_rate in model_params["learning_rates"]:
        for optimizer in model_params["optimizers"]:
            # Build the models
            simple_classifier = build_simple_classifier(
                data["X_train"],
                learning_rate=learning_rate,
                optimizer=optimizer)
            mlp = build_mlp(data["X_train"],
                            learning_rate=learning_rate,
                            optimizer=optimizer)

            # Test the model
            if verbose:
                print("SIMPLE CLASSIFIER - LEARNING_RATE {}, OPTIMIZER {}".
                      format(learning_rate, optimizer))
            simple_classifier_results = test_nn(simple_classifier,
                                                data,
                                                'simple_classifier',
                                                test_params,
                                                optimizer,
                                                learning_rate,
                                                verbose=verbose)
            if verbose:
                print("MLP - LEARNING_RATE {}, OPTIMIZER {}".format(
                    learning_rate, optimizer))
            mlp_results = test_nn(mlp,
                                  data,
                                  'mlp',
                                  test_params,
                                  optimizer,
                                  learning_rate,
                                  verbose=verbose)

            all_simple_results = all_simple_results + simple_classifier_results
            all_mlp_results = all_mlp_results + mlp_results

    plot_nn_heatmap(all_simple_results, plot_type="optimizer", save_fig=True)
    plot_nn_heatmap(all_mlp_results, plot_type="optimizer", save_fig=True)

    return all_simple_results, all_mlp_results