Пример #1
0
def test_weighted_vs_majority(x_data, class_vector) -> (Dict, Dict):
    """
    :return: Returns (error_rates_majority, error_rates_weighted)
    """
    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
    average_error_rates: Dict[Tuple[str, int], float] = {}

    for weight_method in KNearestNeighbors.WEIGHT_METHODS:
        for n_neighbors in range(1, 51):
            classification_errors = []
            for train_indexes, test_indexes in kfold.split(x_data, class_vector):
                knn = KNearestNeighbors(x_data[train_indexes], class_vector[train_indexes], weight_method=weight_method,
                                        n_neighbors=n_neighbors)
                predicteds = knn.classify(x_data[test_indexes])
                n_errors = 0

                for predicted, actual in zip(predicteds, class_vector[test_indexes]):
                    if predicted != actual:
                        n_errors += 1

                classification_errors.append(n_errors / len(predicteds))

            average_error_rates[(weight_method, n_neighbors)] = np.average(classification_errors)

    def error_rates_method(method):
        return {n_n: err_rate for (method_, n_n), err_rate in average_error_rates.items() if method_ == method}

    return error_rates_method('majority'), error_rates_method('weighted')
Пример #2
0
 def test_knn(self):
     x_data = np.array([[1, 2, 3], [2, 3, 4], [17, 18, 19]])
     y = [1, 1, 2]
     knn = KNearestNeighbors(x_data,
                             y,
                             n_neighbors=2,
                             weight_method='weighted',
                             distance_method='euclidean')
     assert knn.classify(np.array([[18, 19, 20]]))[0] == 2