def test_classifier(data): # tests default attributes X, y = data aggregatedKNN = KNNAlgorithmM() assert aggregatedKNN.k_neighbours == (3, 5, 7) # tests adding special fields in learning aggregatedKNN.fit(X, y) assert hasattr(aggregatedKNN, 'classes_') assert hasattr(aggregatedKNN, 'X_') assert hasattr(aggregatedKNN, 'y_') # tests output shape y_pred = aggregatedKNN.predict(X) assert y_pred.shape == (X.shape[0], )
def test_main_class_probability(): test_decisions = np.array([0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1]) expected_p = np.array([[1 / 3, 2 / 5, 3 / 7], [1 / 3, 3 / 5, 4 / 7], [1, 3 / 5, 5 / 7]]) a = KNNAlgorithmM() a.y_ = test_decisions sorted_distance = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [5, 6, 1, 2, 3, 0, 7, 9, 8, 10, 11, 4], [3, 7, 8, 9, 10, 11, 2, 1, 4, 0, 6, 5]]) p = np.zeros(shape=(3, 3)) j = 0 for i in range(0, 3): j = 0 for k in (3, 5, 7): p[i, j] = a._classic_knn(sorted_distance, k, i) j += 1 assert_allclose(p, expected_p, rtol=0.1, atol=0.1)
def test_measure(): # create in-memory dataset bmi = pd.DataFrame.from_dict({ 'height': [1.6, 1.6, 1.62, 1.75, 1.7, 1.8, 1.9], 'weight': [60, 80, 80, 90, 80, 85, 82], 'bmi': [1, 0, 0, 0, 0, 1, 1] }) print(bmi) aggregatedKNN = KNNAlgorithmM(k_neighbours=(1, 3)) # fit a model model = aggregatedKNN.fit(bmi.iloc[:, :2], np.ravel(bmi.iloc[:, 2:3])) print(bmi.iloc[:, :2]) print(np.ravel(bmi.iloc[:, 2:3])) # checking first object decision pred = model.predict([[1.6, 60]]) print(pred) #assert pred == [1] # checking classification score (accuracy here) score = model.score([[1.6, 60], [1.59, 61], [1.6, 80]], [1, 1, 0]) print(score) assert score == 1.0
def test_m_dist_equivalence(): X, y = load_iris(True) m = KNNAlgorithmM() m.fit(X, y) def full_distance(X, X_): distance = np.empty(shape=(X.shape[0], X_.shape[0]), dtype=np.float64) i = j = 0 for test in X: for train in X_: distance[i][j] = m.euclidean_distance_with_missing_values( test, train) j += 1 i += 1 j = 0 return distance #assert_array_equal(m.euclidean_distance_with_missing_values(X, X), # m.euclidean_distance_with_missing_values_optimized(X, X)) assert_array_equal( m.euclidean_distance_with_missing_values_optimized(X, X), full_distance(X, X))
def test_computing_euclidean_distance_with_missing_values(): dummy_train = np.array([[0.2, 0.5, 1.0], [-1, 0.4, 0.1], [0.2, 0.4, 0.4]]) dummy_test = np.array([[0.8, 0.1, -1], [0.7, 0.2, 0.4], [0.6, -1, 0.8]]) distance = euclidean_distances(dummy_test, dummy_train) print(distance) #assert_array_equal(expected_distance, distance) missing_indexes = KNNAlgorithmM().get_missing_values_indexes(dummy_test) print(missing_indexes) missing_train_indexes = KNNAlgorithmM().get_missing_values_indexes( dummy_train) print(missing_train_indexes) for k in missing_indexes.keys(): for test in range(dummy_train.shape[0]): distance[int(k)][test] = KNNAlgorithmM( ).euclidean_distance_with_missing_values(dummy_test[test], dummy_train[int(k)]) print(distance) #for k in missing_train_indexes.keys(): # for train in range(dummy_test.shape[0]): # distance[train][int(k)] = KNNAlgorithmM.euclidean_distance_with_missing_values(dummy_train[int(k)], dummy_test[train]) #print(distance) i = j = 0 for test in dummy_test: for train in dummy_train: distance[i][j] = KNNAlgorithmM( ).euclidean_distance_with_missing_values(test, train) j += 1 i += 1 j = 0 print(distance) expected_distance = np.array([[1.232, 1.240, 0.9], [0.836, 0.787, 0.538], [0.67, 1.191, 0.824]]) assert_array_equal(expected_distance, distance)
.fit_transform(X) multiclassF = OneVsRestClassifierForRandomBinaryClassifier(KNNAlgorithmF(missing_representation=-1, r=r, aggregation=agg, k_neighbours=k)) f_result = cross_validate(multiclassF, X_missing, y, scoring='roc_auc_ovo', return_estimator=True, cv=10) w = pd.DataFrame({'algorithm': 'f', 'k': str(k), 'r': f_result['estimator'][0].estimator.r, 'agg': Aggregation.change_aggregation_to_name(f_result['estimator'][0].estimator.aggregation), 'missing': miss, 'auc': np.mean(f_result['test_score']), 'stddev': stdev(f_result['test_score'])}, index=[ind]) print(w) dfs.append(w) ind += 1 concatenated = pd.concat(dfs) concatenated = pd.concat(dfs) concatenated.to_excel('concatenated_vertebral.xlsx') for miss in missing: for k in ks: X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss) \ .fit_transform(X) multiclassM = OneVsRestClassifier(KNNAlgorithmM(missing_representation=-1, k_neighbours=k)) m_result = cross_validate(multiclassM, X_missing, y, scoring='roc_auc_ovo', return_estimator=True, cv=10) w2 = pd.DataFrame({'algorithm': 'm', 'k': str(k), 'r': '', 'agg': '', 'missing': miss, 'auc': np.mean(m_result['test_score']), 'stddev': stdev(m_result['test_score'])}, index=[ind]) print(w2) dfs.append(w2) ind += 1 concatenated = pd.concat(dfs) concatenated.to_excel('concatenated_vertebral_m.xlsx')
stdev(f_result['test_score']) }, index=[ind]) print(w) dfs.append(w) ind += 1 concatenated = pd.concat(dfs) concatenated.to_excel('concatenated_iris.xlsx') for miss in missing: for k in ks: X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss) \ .fit_transform(X) multiclassM = OneVsRestClassifier( KNNAlgorithmM(missing_representation=-1, k_neighbours=k)) m_result = cross_validate(multiclassM, X_missing, y, scoring='roc_auc_ovo', return_estimator=True, cv=10) w2 = pd.DataFrame( { 'algorithm': 'm', 'k': str(k), 'r': '', 'agg': '', 'missing': miss, 'auc': np.mean(m_result['test_score']),
def test_sorting(): distance = np.array([[0.8653, 0.6731, 0.9863, 0.3042, 0.9, 0.452], [0.1231, 0.8763, 0.0112, 0.7633, 0.912, 0.341]]) tested = KNNAlgorithmM().take_k_smallest_and_sort_distances(distance, 5) assert_array_equal(tested, np.argsort(distance)[:, :5])