Exemplo n.º 1
0
def train_model(model_name, X_train, Y_train, X_val, Y_val):
    """
    Trains a supervised classifier using the training data provided, and scores
    it using the validation dataset.

    Param:
        - model_name: a string containing a model type
        - Train data:
            - X_train
            - Y_train
        - Validation data:
            - X_val
            - Y_val

    Return:
        - model: a supervised classifier, to be used for testing
    """

    if model_name == 'svm':
        model = SVM()
    elif model_name == "random_forest":
        max_depth = 2
        model = RandomForest(max_depth)
    elif model_name == "neural_network":
        max_depth = 2
        out_size = 2
        hidden_size = 30
        in_size = X_train.shape[1]
        model = NeuralNetwork(in_size, hidden_size, out_size)
    elif model_name == "knn":
        n_neighbors = 50
        model = KNearestNeighbors(n_neighbors)
    else:
        return "Error: Model not yet implemented..."

    print("Training " + model_name + "...")

    train_score = model.train(X_train, Y_train)
    valid_score = model.score(X_val, Y_val)

    print("Training Accuracy: %s" % train_score)
    print("Validation Accuracy: %s" % valid_score)

    return model
Exemplo n.º 2
0
    knn_misclassification_rate = np.zeros(3)
    for index, k in enumerate([1, 5, 10]):
        knn = sklearn.neighbors.KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train, y_train)
        knn_result = knn.predict(X_test)
        knn_misclassification_rate[index] = np.mean(knn_result != y_test)
    plt.figure()
    plt.plot(1 / np.array([1, 5, 10]), knn_misclassification_rate, "-bo")
    plt.suptitle("Misclassification rate v.s. 1/k of KNN")
    plt.show()
    print(knn_misclassification_rate)

    # Question 5 #

    k_nearest_neighbors = KNearestNeighbors(1)
    k_nearest_neighbors.fit(X_train, y_train)
    knn_result = k_nearest_neighbors.predict(X_test)
    knn_naive_misclassification_rate = np.mean(knn_result != y_test)
    print(knn_naive_misclassification_rate)

    # Question 7 #
    w_knn_misclassification_rate = np.zeros(3)
    for index, k in enumerate([1, 5, 10]):
        knn = WeightedKNearestNeighbors(n_neighbors=k)
        knn.fit(X_train, y_train)
        knn_result = knn.predict(X_test)
        w_knn_misclassification_rate[index] = np.mean(knn_result != y_test)
    plt.figure()
    plt.plot(1 / np.array([1, 5, 10]), w_knn_misclassification_rate, "-bo")
    plt.suptitle("Misclassification rate v.s. 1/k of weighted KNN")