Пример #1
0
    def test_dbscan(self):
        exact_labels = []
        label_1 = "Iris-setosa"
        label_2 = "Iris-versicolor"
        label_3 = "Iris-virginica"

        for item in self.data["label"]:
            if item == label_1:
                exact_labels.append(2)
            elif item == label_2:
                exact_labels.append(3)
            else:
                exact_labels.append(1)

        epsilon = 2
        min_pts = 2
        dbscan = Dbscan(epsilon, min_pts)

        X_train, X_test, y_train, y_test = train_test_split(self.features,
                                                            self.exact_labels,
                                                            test_size=0.33,
                                                            random_state=42)
        dbscan.load_data(X_train.to_numpy().tolist())
        dbscan.train()
        labels = dbscan.predict(X_test.to_numpy().tolist())

        accurate_sum = 0
        for i in range(len(labels)):
            if labels[i] == y_test[i]:
                accurate_sum += 1

        print("Akurasi DBScan: ", accurate_sum / len(labels))

        clustering_labels = DBSCAN(eps=epsilon,
                                   min_samples=min_pts).fit_predict(X_train)
        clustering_labels = [c + 3 for c in clustering_labels]

        sklearn_accurate_sum = 0
        for i in range(len(labels)):
            if clustering_labels[i] == y_test[i]:
                sklearn_accurate_sum += 1

        print("Akurasi DBScan sklearn: ", sklearn_accurate_sum / len(labels))